From d4d1c05b32f6702c8ba9490dc58a6dc80400276e Mon Sep 17 00:00:00 2001 From: mnehete32 Date: Sat, 1 Nov 2025 16:22:41 +0530 Subject: [PATCH 1/5] CUDA: add FLOOR, CEIL, ROUND, TRUNC unary ops --- ggml/src/ggml-cuda/ggml-cuda.cu | 16 ++++++++++++++++ ggml/src/ggml-cuda/unary.cu | 32 ++++++++++++++++++++++++++++++++ ggml/src/ggml-cuda/unary.cuh | 8 ++++++++ 3 files changed, 56 insertions(+) diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu index 61a8f1df87de1..5667ec0c4d709 100644 --- a/ggml/src/ggml-cuda/ggml-cuda.cu +++ b/ggml/src/ggml-cuda/ggml-cuda.cu @@ -2499,6 +2499,18 @@ static bool ggml_cuda_compute_forward(ggml_backend_cuda_context & ctx, struct gg case GGML_UNARY_OP_XIELU: ggml_cuda_op_xielu(ctx, dst); break; + case GGML_UNARY_OP_FLOOR: + ggml_cuda_op_floor(ctx, dst); + break; + case GGML_UNARY_OP_CEIL: + ggml_cuda_op_ceil(ctx, dst); + break; + case GGML_UNARY_OP_ROUND: + ggml_cuda_op_round(ctx, dst); + break; + case GGML_UNARY_OP_TRUNC: + ggml_cuda_op_trunc(ctx, dst); + break; default: return false; } @@ -3769,6 +3781,10 @@ static bool ggml_backend_cuda_device_supports_op(ggml_backend_dev_t dev, const g case GGML_UNARY_OP_TANH: case GGML_UNARY_OP_EXP: case GGML_UNARY_OP_ELU: + case GGML_UNARY_OP_FLOOR: + case GGML_UNARY_OP_CEIL: + case GGML_UNARY_OP_ROUND: + case GGML_UNARY_OP_TRUNC: return ggml_is_contiguous(op->src[0]); default: return false; diff --git a/ggml/src/ggml-cuda/unary.cu b/ggml/src/ggml-cuda/unary.cu index 5f0d3a6726aef..c1dc6ddbf8f81 100644 --- a/ggml/src/ggml-cuda/unary.cu +++ b/ggml/src/ggml-cuda/unary.cu @@ -85,6 +85,22 @@ static __device__ __forceinline__ float op_elu(float x) { return (x > 0.f) ? x : expm1f(x); } +static __device__ __forceinline__ float op_floor(float x) { + return floorf(x); +} + +static __device__ __forceinline__ float op_ceil(float x) { + return ceilf(x); +} + +static __device__ __forceinline__ float op_round(float x) { + return round(x); +} + +static __device__ __forceinline__ float op_trunc(float x) { + return trunc(x); +} + template static __global__ void unary_op_kernel(const T * x, T * dst, const int k) { const int i = blockDim.x*blockIdx.x + threadIdx.x; @@ -201,6 +217,22 @@ void ggml_cuda_op_log(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { void ggml_cuda_op_elu(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { ggml_cuda_op_unary(ctx, dst); } + +void ggml_cuda_op_floor(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { + ggml_cuda_op_unary(ctx, dst); +} + +void ggml_cuda_op_ceil(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { + ggml_cuda_op_unary(ctx, dst); +} + +void ggml_cuda_op_round(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { + ggml_cuda_op_unary(ctx, dst); +} + +void ggml_cuda_op_trunc(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { + ggml_cuda_op_unary(ctx, dst); +} /* gated ops */ template diff --git a/ggml/src/ggml-cuda/unary.cuh b/ggml/src/ggml-cuda/unary.cuh index 6c738cefecfd2..2800c75ba3f7a 100644 --- a/ggml/src/ggml-cuda/unary.cuh +++ b/ggml/src/ggml-cuda/unary.cuh @@ -63,6 +63,14 @@ void ggml_cuda_op_log(ggml_backend_cuda_context & ctx, ggml_tensor * dst); void ggml_cuda_op_elu(ggml_backend_cuda_context & ctx, ggml_tensor * dst); +void ggml_cuda_op_floor(ggml_backend_cuda_context & ctx, ggml_tensor * dst); + +void ggml_cuda_op_ceil(ggml_backend_cuda_context & ctx, ggml_tensor * dst); + +void ggml_cuda_op_round(ggml_backend_cuda_context & ctx, ggml_tensor * dst); + +void ggml_cuda_op_trunc(ggml_backend_cuda_context & ctx, ggml_tensor * dst); + void ggml_cuda_op_reglu(ggml_backend_cuda_context & ctx, ggml_tensor * dst); void ggml_cuda_op_geglu(ggml_backend_cuda_context & ctx, ggml_tensor * dst); From 48c4ace4dac8ee1970496c6985878b23cd2ae6f9 Mon Sep 17 00:00:00 2001 From: mnehete32 Date: Tue, 4 Nov 2025 21:56:31 +0530 Subject: [PATCH 2/5] update cuda ops --- docs/ops.md | 24 +- docs/ops/CUDA.csv | 15775 ++++++++++++++++++++++++++++++++------------ 2 files changed, 11461 insertions(+), 4338 deletions(-) diff --git a/docs/ops.md b/docs/ops.md index 3738a48072832..9c6d698c656b8 100644 --- a/docs/ops.md +++ b/docs/ops.md @@ -18,7 +18,7 @@ Legend: | ACC | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ | | ADD | ❌ | ✅ | ✅ | ✅ | 🟡 | 🟡 | ✅ | ✅ | ❌ | | ADD1 | ❌ | ✅ | ✅ | ✅ | ❌ | ❌ | ✅ | ❌ | ❌ | -| ADD_ID | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | +| ADD_ID | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | | ARANGE | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | | ARGMAX | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ | | ARGSORT | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | @@ -26,7 +26,7 @@ Legend: | CLAMP | ❌ | ✅ | ✅ | ✅ | 🟡 | 🟡 | ✅ | 🟡 | ❌ | | CONCAT | ❌ | ✅ | ✅ | 🟡 | ✅ | 🟡 | 🟡 | ✅ | ❌ | | CONT | ❌ | 🟡 | ✅ | ✅ | ✅ | 🟡 | 🟡 | 🟡 | ❌ | -| CONV_2D | ❌ | ❌ | ✅ | ❌ | ❌ | ✅ | ❌ | ✅ | ❌ | +| CONV_2D | ❌ | ❌ | ✅ | ✅ | ❌ | ✅ | ❌ | ✅ | ❌ | | CONV_2D_DW | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ | | CONV_3D | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | | CONV_TRANSPOSE_1D | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ | @@ -53,11 +53,11 @@ Legend: | GET_ROWS | ❌ | 🟡 | ✅ | 🟡 | ✅ | 🟡 | 🟡 | 🟡 | ❌ | | GET_ROWS_BACK | ❌ | ❌ | 🟡 | 🟡 | ❌ | ❌ | ❌ | ❌ | ❌ | | GROUP_NORM | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | -| GROUP_NORM_MUL_ADD | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | +| GROUP_NORM_MUL_ADD | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ✅ | ❌ | ❌ | | HARDSIGMOID | ❌ | ✅ | ✅ | 🟡 | 🟡 | ❌ | 🟡 | ❌ | ❌ | | HARDSWISH | ❌ | ✅ | ✅ | 🟡 | 🟡 | ❌ | 🟡 | ❌ | ❌ | | IM2COL | ❌ | ✅ | ✅ | ✅ | 🟡 | ✅ | ✅ | ✅ | ❌ | -| IM2COL_3D | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | +| IM2COL_3D | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | | L2_NORM | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ | | LEAKY_RELU | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ | | LOG | ❌ | ✅ | ✅ | ✅ | ❌ | ❌ | ✅ | ❌ | ❌ | @@ -67,12 +67,12 @@ Legend: | MUL_MAT_ID | ❌ | 🟡 | ✅ | ✅ | ✅ | 🟡 | 🟡 | ✅ | ❌ | | NEG | ❌ | ✅ | ✅ | 🟡 | 🟡 | ❌ | 🟡 | ❌ | ❌ | | NORM | ❌ | ✅ | ✅ | ✅ | 🟡 | ✅ | ✅ | 🟡 | ❌ | -| NORM_MUL_ADD | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | +| NORM_MUL_ADD | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ✅ | ❌ | ❌ | | OPT_STEP_ADAMW | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ | -| OPT_STEP_SGD | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | +| OPT_STEP_SGD | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | | OUT_PROD | 🟡 | ❌ | 🟡 | 🟡 | ❌ | ❌ | 🟡 | ❌ | ❌ | -| PAD | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | 🟡 | ✅ | ❌ | -| PAD_REFLECT_1D | ❌ | ✅ | ✅ | ❌ | ✅ | ❌ | ✅ | ❌ | ❌ | +| PAD | ❌ | ✅ | ✅ | 🟡 | ✅ | ✅ | 🟡 | ✅ | ❌ | +| PAD_REFLECT_1D | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ❌ | ❌ | | POOL_2D | ❌ | 🟡 | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ | | REGLU | ❌ | ✅ | ✅ | ✅ | 🟡 | ✅ | ✅ | 🟡 | ❌ | | RELU | ❌ | ✅ | ✅ | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | ❌ | @@ -81,7 +81,7 @@ Legend: | RMS_NORM | ❌ | ✅ | ✅ | ✅ | 🟡 | ✅ | ✅ | ✅ | ❌ | | RMS_NORM_BACK | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ✅ | ✅ | ❌ | | RMS_NORM_MUL_ADD | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | -| ROLL | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | +| ROLL | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ | | ROPE | ❌ | 🟡 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | | ROPE_BACK | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ | | ROUND | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | @@ -95,7 +95,7 @@ Legend: | SILU | ❌ | ✅ | ✅ | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | ❌ | | SILU_BACK | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ | | SIN | ❌ | ✅ | ✅ | ✅ | 🟡 | ❌ | ✅ | 🟡 | ❌ | -| SOFTCAP | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | +| SOFTCAP | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ✅ | ❌ | ❌ | | SOFT_MAX | ❌ | 🟡 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | | SOFT_MAX_BACK | ❌ | ❌ | 🟡 | 🟡 | ❌ | ❌ | 🟡 | ✅ | ❌ | | SQR | ❌ | ✅ | ✅ | ✅ | 🟡 | ❌ | ✅ | 🟡 | ❌ | @@ -105,9 +105,9 @@ Legend: | STEP | ❌ | ✅ | ✅ | 🟡 | 🟡 | ❌ | 🟡 | ❌ | ❌ | | SUB | ❌ | ✅ | ✅ | ✅ | 🟡 | 🟡 | ✅ | ✅ | ❌ | | SUM | ❌ | ✅ | ✅ | ✅ | ❌ | ❌ | ✅ | ✅ | ❌ | -| SUM_ROWS | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | 🟡 | ✅ | ❌ | +| SUM_ROWS | ❌ | ✅ | ✅ | 🟡 | ✅ | ✅ | 🟡 | ✅ | ❌ | | SWIGLU | ❌ | ✅ | ✅ | ✅ | 🟡 | ✅ | ✅ | 🟡 | ❌ | -| SWIGLU_OAI | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | +| SWIGLU_OAI | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | | TANH | ❌ | ✅ | ✅ | 🟡 | 🟡 | ✅ | 🟡 | 🟡 | ❌ | | TIMESTEP_EMBEDDING | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | | TOPK_MOE | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | diff --git a/docs/ops/CUDA.csv b/docs/ops/CUDA.csv index 71e47977e31d1..723be69d81d74 100644 --- a/docs/ops/CUDA.csv +++ b/docs/ops/CUDA.csv @@ -239,99 +239,117 @@ "CUDA0","GEGLU_QUICK","type=f32,ne_a=[5,7,11,13],v=1,swapped=1","support","1","yes","CUDA" "CUDA0","GEGLU_QUICK","type=f32,ne_a=[128,2,2,2],v=1,split","support","1","yes","CUDA" "CUDA0","GEGLU_QUICK","type=f32,ne_a=[5,7,11,13],v=1,split","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=f32,n=1,m=8,r=2,b=1,v=0","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=f32,n=256,m=5,r=4,b=1,v=0","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=f32,n=256,m=5,r=4,b=1,v=1","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=f32,n=256,m=5,r=4,b=7,v=0","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=f32,n=256,m=5,r=4,b=7,v=1","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=f16,n=256,m=5,r=4,b=1,v=0","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=f16,n=256,m=5,r=4,b=1,v=1","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=f16,n=256,m=5,r=4,b=7,v=0","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=f16,n=256,m=5,r=4,b=7,v=1","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=bf16,n=256,m=5,r=4,b=1,v=0","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=bf16,n=256,m=5,r=4,b=1,v=1","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=bf16,n=256,m=5,r=4,b=7,v=0","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=bf16,n=256,m=5,r=4,b=7,v=1","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=q4_0,n=256,m=5,r=4,b=1,v=0","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=q4_0,n=256,m=5,r=4,b=1,v=1","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=q4_0,n=256,m=5,r=4,b=7,v=0","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=q4_0,n=256,m=5,r=4,b=7,v=1","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=q4_1,n=256,m=5,r=4,b=1,v=0","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=q4_1,n=256,m=5,r=4,b=1,v=1","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=q4_1,n=256,m=5,r=4,b=7,v=0","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=q4_1,n=256,m=5,r=4,b=7,v=1","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=q5_0,n=256,m=5,r=4,b=1,v=0","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=q5_0,n=256,m=5,r=4,b=1,v=1","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=q5_0,n=256,m=5,r=4,b=7,v=0","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=q5_0,n=256,m=5,r=4,b=7,v=1","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=q5_1,n=256,m=5,r=4,b=1,v=0","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=q5_1,n=256,m=5,r=4,b=1,v=1","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=q5_1,n=256,m=5,r=4,b=7,v=0","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=q5_1,n=256,m=5,r=4,b=7,v=1","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=q8_0,n=256,m=5,r=4,b=1,v=0","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=q8_0,n=256,m=5,r=4,b=1,v=1","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=q8_0,n=256,m=5,r=4,b=7,v=0","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=q8_0,n=256,m=5,r=4,b=7,v=1","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=q2_K,n=256,m=5,r=4,b=1,v=0","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=q2_K,n=256,m=5,r=4,b=1,v=1","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=q2_K,n=256,m=5,r=4,b=7,v=0","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=q2_K,n=256,m=5,r=4,b=7,v=1","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=q3_K,n=256,m=5,r=4,b=1,v=0","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=q3_K,n=256,m=5,r=4,b=1,v=1","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=q3_K,n=256,m=5,r=4,b=7,v=0","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=q3_K,n=256,m=5,r=4,b=7,v=1","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=q4_K,n=256,m=5,r=4,b=1,v=0","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=q4_K,n=256,m=5,r=4,b=1,v=1","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=q4_K,n=256,m=5,r=4,b=7,v=0","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=q4_K,n=256,m=5,r=4,b=7,v=1","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=q5_K,n=256,m=5,r=4,b=1,v=0","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=q5_K,n=256,m=5,r=4,b=1,v=1","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=q5_K,n=256,m=5,r=4,b=7,v=0","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=q5_K,n=256,m=5,r=4,b=7,v=1","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=q6_K,n=256,m=5,r=4,b=1,v=0","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=q6_K,n=256,m=5,r=4,b=1,v=1","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=q6_K,n=256,m=5,r=4,b=7,v=0","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=q6_K,n=256,m=5,r=4,b=7,v=1","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=iq2_xxs,n=256,m=5,r=4,b=1,v=0","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=iq2_xxs,n=256,m=5,r=4,b=1,v=1","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=iq2_xxs,n=256,m=5,r=4,b=7,v=0","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=iq2_xxs,n=256,m=5,r=4,b=7,v=1","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=iq2_xs,n=256,m=5,r=4,b=1,v=0","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=iq2_xs,n=256,m=5,r=4,b=1,v=1","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=iq2_xs,n=256,m=5,r=4,b=7,v=0","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=iq2_xs,n=256,m=5,r=4,b=7,v=1","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=iq2_s,n=256,m=5,r=4,b=1,v=0","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=iq2_s,n=256,m=5,r=4,b=1,v=1","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=iq2_s,n=256,m=5,r=4,b=7,v=0","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=iq2_s,n=256,m=5,r=4,b=7,v=1","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=iq3_xxs,n=256,m=5,r=4,b=1,v=0","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=iq3_xxs,n=256,m=5,r=4,b=1,v=1","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=iq3_xxs,n=256,m=5,r=4,b=7,v=0","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=iq3_xxs,n=256,m=5,r=4,b=7,v=1","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=iq1_s,n=256,m=5,r=4,b=1,v=0","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=iq1_s,n=256,m=5,r=4,b=1,v=1","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=iq1_s,n=256,m=5,r=4,b=7,v=0","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=iq1_s,n=256,m=5,r=4,b=7,v=1","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=iq1_m,n=256,m=5,r=4,b=1,v=0","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=iq1_m,n=256,m=5,r=4,b=1,v=1","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=iq1_m,n=256,m=5,r=4,b=7,v=0","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=iq1_m,n=256,m=5,r=4,b=7,v=1","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=iq4_nl,n=256,m=5,r=4,b=1,v=0","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=iq4_nl,n=256,m=5,r=4,b=1,v=1","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=iq4_nl,n=256,m=5,r=4,b=7,v=0","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=iq4_nl,n=256,m=5,r=4,b=7,v=1","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=iq3_s,n=256,m=5,r=4,b=1,v=0","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=iq3_s,n=256,m=5,r=4,b=1,v=1","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=iq3_s,n=256,m=5,r=4,b=7,v=0","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=iq3_s,n=256,m=5,r=4,b=7,v=1","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=iq4_xs,n=256,m=5,r=4,b=1,v=0","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=iq4_xs,n=256,m=5,r=4,b=1,v=1","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=iq4_xs,n=256,m=5,r=4,b=7,v=0","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=iq4_xs,n=256,m=5,r=4,b=7,v=1","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=i32,n=256,m=5,r=4,b=1,v=0","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=i32,n=256,m=5,r=4,b=1,v=1","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=i32,n=256,m=5,r=4,b=7,v=0","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=i32,n=256,m=5,r=4,b=7,v=1","support","1","yes","CUDA" +"CUDA0","SWIGLU_OAI","type=f32,ne_a=[128,2,2,2],v=0,alpha=0.500000,limit=2.000000","support","1","yes","CUDA" +"CUDA0","SWIGLU_OAI","type=f32,ne_a=[128,2,2,2],v=0,alpha=0.500000,limit=7.000000","support","1","yes","CUDA" +"CUDA0","SWIGLU_OAI","type=f32,ne_a=[128,2,2,2],v=0,alpha=1.702000,limit=2.000000","support","1","yes","CUDA" +"CUDA0","SWIGLU_OAI","type=f32,ne_a=[128,2,2,2],v=0,alpha=1.702000,limit=7.000000","support","1","yes","CUDA" +"CUDA0","SWIGLU_OAI","type=f32,ne_a=[128,2,2,2],v=1,alpha=0.500000,limit=2.000000","support","1","yes","CUDA" +"CUDA0","SWIGLU_OAI","type=f32,ne_a=[128,2,2,2],v=1,alpha=0.500000,limit=7.000000","support","1","yes","CUDA" +"CUDA0","SWIGLU_OAI","type=f32,ne_a=[128,2,2,2],v=1,alpha=1.702000,limit=2.000000","support","1","yes","CUDA" +"CUDA0","SWIGLU_OAI","type=f32,ne_a=[128,2,2,2],v=1,alpha=1.702000,limit=7.000000","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=f32,n=76800,m=5,r=4,be1=1,be2=2,v=0","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=f32,n=256,m=80000,r=70000,be1=2,be2=1,v=0","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=f32,n=256,m=5,r=4,be1=700,be2=100,v=0","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=q4_0,n=76800,m=5,r=4,be1=1,be2=2,v=0","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=q4_0,n=256,m=80000,r=70000,be1=2,be2=1,v=0","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=q4_0,n=256,m=5,r=4,be1=700,be2=100,v=0","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=f32,n=1,m=8,r=2,be1=1,be2=1,v=0","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=f32,n=256,m=5,r=4,be1=1,be2=1,v=0","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=f32,n=256,m=5,r=4,be1=1,be2=1,v=1","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=f32,n=256,m=5,r=4,be1=7,be2=1,v=0","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=f32,n=256,m=5,r=4,be1=7,be2=1,v=1","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=f16,n=256,m=5,r=4,be1=1,be2=1,v=0","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=f16,n=256,m=5,r=4,be1=1,be2=1,v=1","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=f16,n=256,m=5,r=4,be1=7,be2=1,v=0","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=f16,n=256,m=5,r=4,be1=7,be2=1,v=1","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=bf16,n=256,m=5,r=4,be1=1,be2=1,v=0","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=bf16,n=256,m=5,r=4,be1=1,be2=1,v=1","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=bf16,n=256,m=5,r=4,be1=7,be2=1,v=0","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=bf16,n=256,m=5,r=4,be1=7,be2=1,v=1","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=q4_0,n=256,m=5,r=4,be1=1,be2=1,v=0","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=q4_0,n=256,m=5,r=4,be1=1,be2=1,v=1","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=q4_0,n=256,m=5,r=4,be1=7,be2=1,v=0","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=q4_0,n=256,m=5,r=4,be1=7,be2=1,v=1","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=q4_1,n=256,m=5,r=4,be1=1,be2=1,v=0","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=q4_1,n=256,m=5,r=4,be1=1,be2=1,v=1","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=q4_1,n=256,m=5,r=4,be1=7,be2=1,v=0","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=q4_1,n=256,m=5,r=4,be1=7,be2=1,v=1","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=q5_0,n=256,m=5,r=4,be1=1,be2=1,v=0","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=q5_0,n=256,m=5,r=4,be1=1,be2=1,v=1","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=q5_0,n=256,m=5,r=4,be1=7,be2=1,v=0","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=q5_0,n=256,m=5,r=4,be1=7,be2=1,v=1","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=q5_1,n=256,m=5,r=4,be1=1,be2=1,v=0","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=q5_1,n=256,m=5,r=4,be1=1,be2=1,v=1","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=q5_1,n=256,m=5,r=4,be1=7,be2=1,v=0","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=q5_1,n=256,m=5,r=4,be1=7,be2=1,v=1","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=q8_0,n=256,m=5,r=4,be1=1,be2=1,v=0","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=q8_0,n=256,m=5,r=4,be1=1,be2=1,v=1","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=q8_0,n=256,m=5,r=4,be1=7,be2=1,v=0","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=q8_0,n=256,m=5,r=4,be1=7,be2=1,v=1","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=mxfp4,n=256,m=5,r=4,be1=1,be2=1,v=0","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=mxfp4,n=256,m=5,r=4,be1=1,be2=1,v=1","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=mxfp4,n=256,m=5,r=4,be1=7,be2=1,v=0","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=mxfp4,n=256,m=5,r=4,be1=7,be2=1,v=1","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=q2_K,n=256,m=5,r=4,be1=1,be2=1,v=0","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=q2_K,n=256,m=5,r=4,be1=1,be2=1,v=1","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=q2_K,n=256,m=5,r=4,be1=7,be2=1,v=0","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=q2_K,n=256,m=5,r=4,be1=7,be2=1,v=1","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=q3_K,n=256,m=5,r=4,be1=1,be2=1,v=0","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=q3_K,n=256,m=5,r=4,be1=1,be2=1,v=1","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=q3_K,n=256,m=5,r=4,be1=7,be2=1,v=0","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=q3_K,n=256,m=5,r=4,be1=7,be2=1,v=1","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=q4_K,n=256,m=5,r=4,be1=1,be2=1,v=0","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=q4_K,n=256,m=5,r=4,be1=1,be2=1,v=1","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=q4_K,n=256,m=5,r=4,be1=7,be2=1,v=0","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=q4_K,n=256,m=5,r=4,be1=7,be2=1,v=1","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=q5_K,n=256,m=5,r=4,be1=1,be2=1,v=0","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=q5_K,n=256,m=5,r=4,be1=1,be2=1,v=1","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=q5_K,n=256,m=5,r=4,be1=7,be2=1,v=0","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=q5_K,n=256,m=5,r=4,be1=7,be2=1,v=1","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=q6_K,n=256,m=5,r=4,be1=1,be2=1,v=0","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=q6_K,n=256,m=5,r=4,be1=1,be2=1,v=1","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=q6_K,n=256,m=5,r=4,be1=7,be2=1,v=0","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=q6_K,n=256,m=5,r=4,be1=7,be2=1,v=1","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=iq2_xxs,n=256,m=5,r=4,be1=1,be2=1,v=0","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=iq2_xxs,n=256,m=5,r=4,be1=1,be2=1,v=1","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=iq2_xxs,n=256,m=5,r=4,be1=7,be2=1,v=0","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=iq2_xxs,n=256,m=5,r=4,be1=7,be2=1,v=1","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=iq2_xs,n=256,m=5,r=4,be1=1,be2=1,v=0","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=iq2_xs,n=256,m=5,r=4,be1=1,be2=1,v=1","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=iq2_xs,n=256,m=5,r=4,be1=7,be2=1,v=0","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=iq2_xs,n=256,m=5,r=4,be1=7,be2=1,v=1","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=iq2_s,n=256,m=5,r=4,be1=1,be2=1,v=0","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=iq2_s,n=256,m=5,r=4,be1=1,be2=1,v=1","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=iq2_s,n=256,m=5,r=4,be1=7,be2=1,v=0","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=iq2_s,n=256,m=5,r=4,be1=7,be2=1,v=1","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=iq3_xxs,n=256,m=5,r=4,be1=1,be2=1,v=0","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=iq3_xxs,n=256,m=5,r=4,be1=1,be2=1,v=1","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=iq3_xxs,n=256,m=5,r=4,be1=7,be2=1,v=0","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=iq3_xxs,n=256,m=5,r=4,be1=7,be2=1,v=1","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=iq1_s,n=256,m=5,r=4,be1=1,be2=1,v=0","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=iq1_s,n=256,m=5,r=4,be1=1,be2=1,v=1","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=iq1_s,n=256,m=5,r=4,be1=7,be2=1,v=0","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=iq1_s,n=256,m=5,r=4,be1=7,be2=1,v=1","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=iq1_m,n=256,m=5,r=4,be1=1,be2=1,v=0","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=iq1_m,n=256,m=5,r=4,be1=1,be2=1,v=1","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=iq1_m,n=256,m=5,r=4,be1=7,be2=1,v=0","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=iq1_m,n=256,m=5,r=4,be1=7,be2=1,v=1","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=iq4_nl,n=256,m=5,r=4,be1=1,be2=1,v=0","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=iq4_nl,n=256,m=5,r=4,be1=1,be2=1,v=1","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=iq4_nl,n=256,m=5,r=4,be1=7,be2=1,v=0","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=iq4_nl,n=256,m=5,r=4,be1=7,be2=1,v=1","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=iq3_s,n=256,m=5,r=4,be1=1,be2=1,v=0","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=iq3_s,n=256,m=5,r=4,be1=1,be2=1,v=1","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=iq3_s,n=256,m=5,r=4,be1=7,be2=1,v=0","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=iq3_s,n=256,m=5,r=4,be1=7,be2=1,v=1","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=iq4_xs,n=256,m=5,r=4,be1=1,be2=1,v=0","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=iq4_xs,n=256,m=5,r=4,be1=1,be2=1,v=1","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=iq4_xs,n=256,m=5,r=4,be1=7,be2=1,v=0","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=iq4_xs,n=256,m=5,r=4,be1=7,be2=1,v=1","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=i32,n=256,m=5,r=4,be1=1,be2=1,v=0","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=i32,n=256,m=5,r=4,be1=1,be2=1,v=1","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=i32,n=256,m=5,r=4,be1=7,be2=1,v=0","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=i32,n=256,m=5,r=4,be1=7,be2=1,v=1","support","1","yes","CUDA" "CUDA0","GET_ROWS_BACK","type=f32,n=1,m=8,r=2,b=1,v=0","support","1","yes","CUDA" "CUDA0","GET_ROWS_BACK","type=f32,n=256,m=5,r=4,b=1,v=0","support","1","yes","CUDA" "CUDA0","GET_ROWS_BACK","type=f32,n=256,m=5,r=4,b=1,v=1","support","1","yes","CUDA" @@ -349,6 +367,8 @@ "CUDA0","GET_ROWS_BACK","type=q5_1,n=256,m=5,r=4,b=1,v=1","support","0","no","CUDA" "CUDA0","GET_ROWS_BACK","type=q8_0,n=256,m=5,r=4,b=1,v=0","support","0","no","CUDA" "CUDA0","GET_ROWS_BACK","type=q8_0,n=256,m=5,r=4,b=1,v=1","support","0","no","CUDA" +"CUDA0","GET_ROWS_BACK","type=mxfp4,n=256,m=5,r=4,b=1,v=0","support","0","no","CUDA" +"CUDA0","GET_ROWS_BACK","type=mxfp4,n=256,m=5,r=4,b=1,v=1","support","0","no","CUDA" "CUDA0","GET_ROWS_BACK","type=q2_K,n=256,m=5,r=4,b=1,v=0","support","0","no","CUDA" "CUDA0","GET_ROWS_BACK","type=q2_K,n=256,m=5,r=4,b=1,v=1","support","0","no","CUDA" "CUDA0","GET_ROWS_BACK","type=q3_K,n=256,m=5,r=4,b=1,v=0","support","0","no","CUDA" @@ -500,6 +520,18 @@ "CUDA0","SET_ROWS","type=q8_0,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA" "CUDA0","SET_ROWS","type=q8_0,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","yes","CUDA" "CUDA0","SET_ROWS","type=q8_0,ne=[96,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=mxfp4,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=mxfp4,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=mxfp4,ne=[96,3,1,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=mxfp4,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=mxfp4,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=mxfp4,ne=[96,3,1,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=mxfp4,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=mxfp4,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=mxfp4,ne=[96,3,7,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=mxfp4,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=mxfp4,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=mxfp4,ne=[96,3,7,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" "CUDA0","SET_ROWS","type=q2_K,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" "CUDA0","SET_ROWS","type=q2_K,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","no","CUDA" "CUDA0","SET_ROWS","type=q2_K,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" @@ -883,794 +915,3887 @@ "CUDA0","IM2COL","type_input=f32,type_kernel=f16,dst_type=f16,ne_input=[12,12,1,2560],ne_kernel=[3,3,1,2560],s0=1,s1=1,p0=1,p1=1,d0=1,d1=1,is_2D=1","support","1","yes","CUDA" "CUDA0","IM2COL","type_input=f32,type_kernel=f16,dst_type=f16,ne_input=[12,12,2,2560],ne_kernel=[3,3,2,2560],s0=1,s1=1,p0=1,p1=1,d0=1,d1=1,is_2D=1","support","1","yes","CUDA" "CUDA0","IM2COL","type_input=f32,type_kernel=f16,dst_type=f16,ne_input=[5,5,1,32],ne_kernel=[3,4,1,32],s0=1,s1=1,p0=0,p1=0,d0=1,d1=1,is_2D=1","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[10,10,10,9],ne_kernel=[3,3,3,1],IC=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f16,dst_type=f32,ne_input=[10,10,10,9],ne_kernel=[3,3,3,1],IC=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f16,dst_type=f16,ne_input=[10,10,10,9],ne_kernel=[3,3,3,1],IC=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" "CUDA0","CONV_2D_DW","ne_input=[17,34,9,1],ne_kernel=[3,3,1,9],stride=1,padding=0,dilation=1,cwhn=0","support","1","yes","CUDA" "CUDA0","CONV_2D_DW","ne_input=[17,34,9,1],ne_kernel=[3,3,1,9],stride=1,padding=0,dilation=1,cwhn=1","support","1","yes","CUDA" "CUDA0","CONV_2D_DW","ne_input=[32,8,64,1],ne_kernel=[3,3,1,64],stride=2,padding=1,dilation=1,cwhn=0","support","1","yes","CUDA" "CUDA0","CONV_2D_DW","ne_input=[32,8,64,1],ne_kernel=[3,3,1,64],stride=2,padding=1,dilation=1,cwhn=1","support","1","yes","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=4,ID=8,IH=8,IW=8,OC=8,KD=1,KH=1,KW=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=4,ID=8,IH=8,IW=8,OC=8,KD=1,KH=1,KW=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA" "CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[1,1,1,1],s0=1,p0=0,d0=1","support","1","yes","CUDA" "CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[1,1,1,1],s0=2,p0=0,d0=1","support","1","yes","CUDA" "CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[1,1,1,1],s0=3,p0=0,d0=1","support","1","yes","CUDA" @@ -1792,6 +4917,7 @@ "CUDA0","COUNT_EQUAL","type=f32,ne=[4,500,1,1]","support","1","yes","CUDA" "CUDA0","COUNT_EQUAL","type=f32,ne=[4,5000,1,1]","support","1","yes","CUDA" "CUDA0","ARGMAX","type=f32,ne=[32,1,1,1]","support","1","yes","CUDA" +"CUDA0","ARGMAX","type=f32,ne=[32,513,1,1]","support","1","yes","CUDA" "CUDA0","ARGMAX","type=f32,ne=[100,10,1,1]","support","1","yes","CUDA" "CUDA0","ARGMAX","type=f32,ne=[1024,10,1,1]","support","1","yes","CUDA" "CUDA0","ARGMAX","type=f32,ne=[1024,12,1,1]","support","1","yes","CUDA" @@ -1909,6 +5035,15 @@ "CUDA0","CPY","type_src=q8_0,type_dst=q8_0,ne=[96,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA" "CUDA0","CPY","type_src=q8_0,type_dst=q8_0,ne=[96,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA" "CUDA0","CPY","type_src=q8_0,type_dst=q8_0,ne=[96,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","CPY","type_src=mxfp4,type_dst=mxfp4,ne=[32,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA" +"CUDA0","CPY","type_src=mxfp4,type_dst=mxfp4,ne=[32,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA" +"CUDA0","CPY","type_src=mxfp4,type_dst=mxfp4,ne=[32,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","CPY","type_src=mxfp4,type_dst=mxfp4,ne=[64,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA" +"CUDA0","CPY","type_src=mxfp4,type_dst=mxfp4,ne=[64,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA" +"CUDA0","CPY","type_src=mxfp4,type_dst=mxfp4,ne=[64,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","CPY","type_src=mxfp4,type_dst=mxfp4,ne=[96,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA" +"CUDA0","CPY","type_src=mxfp4,type_dst=mxfp4,ne=[96,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA" +"CUDA0","CPY","type_src=mxfp4,type_dst=mxfp4,ne=[96,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","no","CUDA" "CUDA0","CPY","type_src=q2_K,type_dst=q2_K,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA" "CUDA0","CPY","type_src=q2_K,type_dst=q2_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA" "CUDA0","CPY","type_src=q2_K,type_dst=q2_K,ne=[256,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","no","CUDA" @@ -2051,6 +5186,8 @@ "CUDA0","CPY","type_src=f16,type_dst=q5_1,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA" "CUDA0","CPY","type_src=f16,type_dst=q8_0,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CUDA" "CUDA0","CPY","type_src=f16,type_dst=q8_0,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA" +"CUDA0","CPY","type_src=f16,type_dst=mxfp4,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CUDA" +"CUDA0","CPY","type_src=f16,type_dst=mxfp4,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA" "CUDA0","CPY","type_src=f16,type_dst=q2_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CUDA" "CUDA0","CPY","type_src=f16,type_dst=q2_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA" "CUDA0","CPY","type_src=f16,type_dst=q3_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CUDA" @@ -2095,6 +5232,8 @@ "CUDA0","CPY","type_src=bf16,type_dst=q5_1,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA" "CUDA0","CPY","type_src=bf16,type_dst=q8_0,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CUDA" "CUDA0","CPY","type_src=bf16,type_dst=q8_0,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA" +"CUDA0","CPY","type_src=bf16,type_dst=mxfp4,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CUDA" +"CUDA0","CPY","type_src=bf16,type_dst=mxfp4,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA" "CUDA0","CPY","type_src=bf16,type_dst=q2_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CUDA" "CUDA0","CPY","type_src=bf16,type_dst=q2_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA" "CUDA0","CPY","type_src=bf16,type_dst=q3_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CUDA" @@ -2139,6 +5278,8 @@ "CUDA0","CPY","type_src=f32,type_dst=q5_1,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CUDA" "CUDA0","CPY","type_src=f32,type_dst=q8_0,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA" "CUDA0","CPY","type_src=f32,type_dst=q8_0,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CUDA" +"CUDA0","CPY","type_src=f32,type_dst=mxfp4,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CUDA" +"CUDA0","CPY","type_src=f32,type_dst=mxfp4,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA" "CUDA0","CPY","type_src=f32,type_dst=q2_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CUDA" "CUDA0","CPY","type_src=f32,type_dst=q2_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA" "CUDA0","CPY","type_src=f32,type_dst=q3_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CUDA" @@ -2183,6 +5324,8 @@ "CUDA0","CPY","type_src=q5_1,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CUDA" "CUDA0","CPY","type_src=q8_0,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA" "CUDA0","CPY","type_src=q8_0,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CUDA" +"CUDA0","CPY","type_src=mxfp4,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CUDA" +"CUDA0","CPY","type_src=mxfp4,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA" "CUDA0","CPY","type_src=q2_K,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CUDA" "CUDA0","CPY","type_src=q2_K,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA" "CUDA0","CPY","type_src=q3_K,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CUDA" @@ -2215,6 +5358,10 @@ "CUDA0","CPY","type_src=f16,type_dst=f32,ne=[256,2,3,4],permute_src=[1,0,2,3],permute_dst=[0,0,0,0]","support","1","yes","CUDA" "CUDA0","CPY","type_src=f32,type_dst=f16,ne=[256,2,3,4],permute_src=[1,0,2,3],permute_dst=[0,0,0,0]","support","1","yes","CUDA" "CUDA0","CPY","type_src=f32,type_dst=f32,ne=[256,2,3,4],permute_src=[1,0,2,3],permute_dst=[0,0,0,0]","support","1","yes","CUDA" +"CUDA0","CPY","type_src=f32,type_dst=i32,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA" +"CUDA0","CPY","type_src=f32,type_dst=i32,ne=[256,2,3,4],permute_src=[1,0,2,3],permute_dst=[0,0,0,0]","support","1","yes","CUDA" +"CUDA0","CPY","type_src=i32,type_dst=f32,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA" +"CUDA0","CPY","type_src=i32,type_dst=f32,ne=[256,2,3,4],permute_src=[1,0,2,3],permute_dst=[0,0,0,0]","support","1","yes","CUDA" "CUDA0","CONT","type=f32,ne=[10,10,10,1]","support","1","yes","CUDA" "CUDA0","CONT","type=f32,ne=[2,1,1,1]","support","1","yes","CUDA" "CUDA0","CONT","type=f32,ne=[2,1,3,5]","support","1","yes","CUDA" @@ -2277,6 +5424,10 @@ "CUDA0","SUB","type=f16,ne=[10,5,4,3],nr=[2,2,2,2],nf=1","support","1","yes","CUDA" "CUDA0","MUL","type=f16,ne=[10,5,4,3],nr=[2,2,2,2],nf=1","support","1","yes","CUDA" "CUDA0","DIV","type=f16,ne=[10,5,4,3],nr=[2,2,2,2],nf=1","support","1","yes","CUDA" +"CUDA0","ADD","type=f16,ne=[1,1,65536,1],nr=[256,1,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","SUB","type=f16,ne=[1,1,65536,1],nr=[256,1,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","MUL","type=f16,ne=[1,1,65536,1],nr=[256,1,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","DIV","type=f16,ne=[1,1,65536,1],nr=[256,1,1,1],nf=1","support","1","yes","CUDA" "CUDA0","ADD","type=f16,ne=[1280,1,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" "CUDA0","SUB","type=f16,ne=[1280,1,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" "CUDA0","MUL","type=f16,ne=[1280,1,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" @@ -2381,6 +5532,10 @@ "CUDA0","SUB","type=f32,ne=[10,5,4,3],nr=[2,2,2,2],nf=1","support","1","yes","CUDA" "CUDA0","MUL","type=f32,ne=[10,5,4,3],nr=[2,2,2,2],nf=1","support","1","yes","CUDA" "CUDA0","DIV","type=f32,ne=[10,5,4,3],nr=[2,2,2,2],nf=1","support","1","yes","CUDA" +"CUDA0","ADD","type=f32,ne=[1,1,65536,1],nr=[256,1,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","SUB","type=f32,ne=[1,1,65536,1],nr=[256,1,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","MUL","type=f32,ne=[1,1,65536,1],nr=[256,1,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","DIV","type=f32,ne=[1,1,65536,1],nr=[256,1,1,1],nf=1","support","1","yes","CUDA" "CUDA0","ADD","type=f32,ne=[1280,1,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" "CUDA0","SUB","type=f32,ne=[1280,1,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" "CUDA0","MUL","type=f32,ne=[1280,1,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" @@ -2440,9 +5595,11 @@ "CUDA0","ADD","type=f32,ne=[10,5,4,3],nr=[1,1,2,2],nf=6","support","1","yes","CUDA" "CUDA0","ADD","type=f32,ne=[10,5,4,3],nr=[1,2,2,2],nf=7","support","1","yes","CUDA" "CUDA0","ADD","type=f32,ne=[16,5,4,3],nr=[2,2,2,2],nf=8","support","1","yes","CUDA" +"CUDA0","ADD","type=f32,ne=[16,5,4,3],nr=[1,1,1,1],nf=16","support","1","yes","CUDA" "CUDA0","ADD1","type=f32,ne=[10,5,4,3]","support","1","yes","CUDA" "CUDA0","SCALE","type=f32,ne=[10,10,10,10],scale=2.000000,bias=0.000000","support","1","yes","CUDA" "CUDA0","SCALE","type=f32,ne=[10,10,10,10],scale=2.000000,bias=1.000000","support","1","yes","CUDA" +"CUDA0","SOFTCAP","type=f32,ne=[10,10,10,10],softcap=50.000000","support","1","yes","CUDA" "CUDA0","SILU_BACK","type=f32,ne=[64,5,4,3],eps=0.000001","support","1","yes","CUDA" "CUDA0","NORM","type=f32,ne=[64,5,4,3],v=0,eps=0.000000","support","1","yes","CUDA" "CUDA0","RMS_NORM","type=f32,ne=[64,5,4,3],v=0,eps=0.000000","support","1","yes","CUDA" @@ -2468,16 +5625,36 @@ "CUDA0","RMS_NORM","type=f32,ne=[64,5,4,3],v=1,eps=0.100000","support","1","yes","CUDA" "CUDA0","RMS_NORM_BACK","type=f32,ne=[64,5,4,3],eps=0.100000","support","1","yes","CUDA" "CUDA0","L2_NORM","type=f32,ne=[64,5,4,3]","support","1","yes","CUDA" -"CUDA0","RMS_NORM_MUL_ADD","type=f32,ne=[64,5,4,3],eps=0.000000,broadcast=0","support","1","yes","CUDA" -"CUDA0","RMS_NORM_MUL_ADD","type=f32,ne=[64,5,4,3],eps=0.000000,broadcast=1","support","1","yes","CUDA" -"CUDA0","RMS_NORM_MUL_ADD","type=f32,ne=[64,5,4,3],eps=0.000001,broadcast=0","support","1","yes","CUDA" -"CUDA0","RMS_NORM_MUL_ADD","type=f32,ne=[64,5,4,3],eps=0.000001,broadcast=1","support","1","yes","CUDA" -"CUDA0","RMS_NORM_MUL_ADD","type=f32,ne=[64,5,4,3],eps=0.000100,broadcast=0","support","1","yes","CUDA" -"CUDA0","RMS_NORM_MUL_ADD","type=f32,ne=[64,5,4,3],eps=0.000100,broadcast=1","support","1","yes","CUDA" -"CUDA0","RMS_NORM_MUL_ADD","type=f32,ne=[64,5,4,3],eps=0.100000,broadcast=0","support","1","yes","CUDA" -"CUDA0","RMS_NORM_MUL_ADD","type=f32,ne=[64,5,4,3],eps=0.100000,broadcast=1","support","1","yes","CUDA" -"CUDA0","RMS_NORM_MUL_ADD","type=f32,ne=[64,5,4,3],eps=1.000000,broadcast=0","support","1","yes","CUDA" -"CUDA0","RMS_NORM_MUL_ADD","type=f32,ne=[64,5,4,3],eps=1.000000,broadcast=1","support","1","yes","CUDA" +"CUDA0","RMS_NORM_MUL_ADD","type=f32,ne=[64,5,4,3],eps=0.000000,broadcast=0,multi_add=0","support","1","yes","CUDA" +"CUDA0","RMS_NORM_MUL_ADD","type=f32,ne=[64,5,4,3],eps=0.000000,broadcast=1,multi_add=0","support","1","yes","CUDA" +"CUDA0","NORM_MUL_ADD","type=f32,ne=[64,5,4,3],eps=0.000000,broadcast=0","support","1","yes","CUDA" +"CUDA0","NORM_MUL_ADD","type=f32,ne=[64,5,4,3],eps=0.000000,broadcast=1","support","1","yes","CUDA" +"CUDA0","RMS_NORM_MUL_ADD","type=f32,ne=[64,5,4,3],eps=0.000001,broadcast=0,multi_add=0","support","1","yes","CUDA" +"CUDA0","RMS_NORM_MUL_ADD","type=f32,ne=[64,5,4,3],eps=0.000001,broadcast=1,multi_add=0","support","1","yes","CUDA" +"CUDA0","NORM_MUL_ADD","type=f32,ne=[64,5,4,3],eps=0.000001,broadcast=0","support","1","yes","CUDA" +"CUDA0","NORM_MUL_ADD","type=f32,ne=[64,5,4,3],eps=0.000001,broadcast=1","support","1","yes","CUDA" +"CUDA0","RMS_NORM_MUL_ADD","type=f32,ne=[64,5,4,3],eps=0.000100,broadcast=0,multi_add=0","support","1","yes","CUDA" +"CUDA0","RMS_NORM_MUL_ADD","type=f32,ne=[64,5,4,3],eps=0.000100,broadcast=1,multi_add=0","support","1","yes","CUDA" +"CUDA0","NORM_MUL_ADD","type=f32,ne=[64,5,4,3],eps=0.000100,broadcast=0","support","1","yes","CUDA" +"CUDA0","NORM_MUL_ADD","type=f32,ne=[64,5,4,3],eps=0.000100,broadcast=1","support","1","yes","CUDA" +"CUDA0","RMS_NORM_MUL_ADD","type=f32,ne=[64,5,4,3],eps=0.100000,broadcast=0,multi_add=0","support","1","yes","CUDA" +"CUDA0","RMS_NORM_MUL_ADD","type=f32,ne=[64,5,4,3],eps=0.100000,broadcast=1,multi_add=0","support","1","yes","CUDA" +"CUDA0","NORM_MUL_ADD","type=f32,ne=[64,5,4,3],eps=0.100000,broadcast=0","support","1","yes","CUDA" +"CUDA0","NORM_MUL_ADD","type=f32,ne=[64,5,4,3],eps=0.100000,broadcast=1","support","1","yes","CUDA" +"CUDA0","RMS_NORM_MUL_ADD","type=f32,ne=[64,5,4,3],eps=1.000000,broadcast=0,multi_add=0","support","1","yes","CUDA" +"CUDA0","RMS_NORM_MUL_ADD","type=f32,ne=[64,5,4,3],eps=1.000000,broadcast=1,multi_add=0","support","1","yes","CUDA" +"CUDA0","NORM_MUL_ADD","type=f32,ne=[64,5,4,3],eps=1.000000,broadcast=0","support","1","yes","CUDA" +"CUDA0","NORM_MUL_ADD","type=f32,ne=[64,5,4,3],eps=1.000000,broadcast=1","support","1","yes","CUDA" +"CUDA0","RMS_NORM_MUL_ADD","type=f32,ne=[1,1,1,1],eps=0.000001,broadcast=0,multi_add=0","support","1","yes","CUDA" +"CUDA0","RMS_NORM_MUL_ADD","type=f32,ne=[1,1,1,1],eps=0.000001,broadcast=0,multi_add=1","support","1","yes","CUDA" +"CUDA0","RMS_NORM_MUL_ADD","type=f32,ne=[511,1,1,1],eps=0.000001,broadcast=0,multi_add=0","support","1","yes","CUDA" +"CUDA0","RMS_NORM_MUL_ADD","type=f32,ne=[511,1,1,1],eps=0.000001,broadcast=0,multi_add=1","support","1","yes","CUDA" +"CUDA0","RMS_NORM_MUL_ADD","type=f32,ne=[1025,1,1,1],eps=0.000001,broadcast=0,multi_add=0","support","1","yes","CUDA" +"CUDA0","RMS_NORM_MUL_ADD","type=f32,ne=[1025,1,1,1],eps=0.000001,broadcast=0,multi_add=1","support","1","yes","CUDA" +"CUDA0","RMS_NORM_MUL_ADD","type=f32,ne=[8192,1,1,1],eps=0.000001,broadcast=0,multi_add=0","support","1","yes","CUDA" +"CUDA0","RMS_NORM_MUL_ADD","type=f32,ne=[8192,1,1,1],eps=0.000001,broadcast=0,multi_add=1","support","1","yes","CUDA" +"CUDA0","RMS_NORM_MUL_ADD","type=f32,ne=[16896,1,1,1],eps=0.000001,broadcast=0,multi_add=0","support","1","yes","CUDA" +"CUDA0","RMS_NORM_MUL_ADD","type=f32,ne=[16896,1,1,1],eps=0.000001,broadcast=0,multi_add=1","support","1","yes","CUDA" "CUDA0","L2_NORM","type=f32,ne=[64,5,4,3]","support","1","yes","CUDA" "CUDA0","SSM_CONV","type=f32,ne_a=[4,1024,1,1],ne_b=[3,1024,1,1]","support","1","yes","CUDA" "CUDA0","SSM_CONV","type=f32,ne_a=[8,1024,1,1],ne_b=[3,1024,1,1]","support","1","yes","CUDA" @@ -2512,1243 +5689,1739 @@ "CUDA0","GATED_LINEAR_ATTN","type=f32,head_count=32,head_size=64,n_seq_tokens=32,n_seqs=1","support","1","yes","CUDA" "CUDA0","GATED_LINEAR_ATTN","type=f32,head_count=32,head_size=64,n_seq_tokens=32,n_seqs=4","support","1","yes","CUDA" "CUDA0","GATED_LINEAR_ATTN","type=f32,head_count=32,head_size=64,n_seq_tokens=128,n_seqs=4","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=32,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=1,k=32,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=1,k=32,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=32,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=1,k=32,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=1,k=1,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=64,n=2,k=128,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=83,n=2,k=128,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=64,n=2,k=64,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=83,n=2,k=64,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=64,n=45,k=128,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=45,k=64,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=193,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=67,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=16,n_used=16,b=0,m=32,n=1024,k=16","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=16,n_used=16,b=1,m=32,n=1024,k=16","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q5_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q5_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q5_1,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q5_1,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q2_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q2_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q3_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q3_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q5_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q5_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q6_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q6_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_s,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_s,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq3_xxs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq3_xxs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq1_s,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq1_s,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq1_m,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq1_m,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq4_nl,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq4_nl,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq3_s,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq3_s,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq4_xs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq4_xs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=bf16,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=bf16,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=32,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=1,k=32,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=1,k=32,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=32,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=1,k=32,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=1,k=1,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=64,n=2,k=128,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=83,n=2,k=128,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=64,n=2,k=64,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=83,n=2,k=64,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=64,n=45,k=128,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=45,k=64,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=193,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=67,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=32,k=32,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1,o=3","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[1,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[1,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[1,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[1,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[1,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[1,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[1,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[1,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[1,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[1,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[1,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[1,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[1,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[1,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[1,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[1,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[1,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[1,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[1,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[1,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[1,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[1,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[1,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[1,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[1,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[1,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[1,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[1,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[1,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[1,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[1,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[1,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[1,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[1,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[1,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[1,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[1,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[1,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[1,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[1,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[1,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[1,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[1,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[1,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[1,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[1,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[1,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[1,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[2,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[2,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[2,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[2,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[2,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[2,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[2,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[2,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[2,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[2,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[2,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[2,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[2,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[2,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[2,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[2,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[2,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[2,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[2,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[2,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[2,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[2,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[2,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[2,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[2,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[2,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[2,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[2,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[2,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[2,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[2,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[2,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[2,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[2,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[2,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[2,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[4,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[4,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[4,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[4,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[4,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[4,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[4,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[4,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[4,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[4,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[4,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[4,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[4,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[4,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[4,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[4,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[4,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[4,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[4,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[4,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[4,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[4,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[4,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[4,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[4,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[4,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[4,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[4,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[4,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[4,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[4,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[4,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[4,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[4,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[4,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[4,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[4,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[4,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[4,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[4,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[4,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[4,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[4,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[4,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[4,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[4,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[4,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[4,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[8,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[8,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[8,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[8,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[8,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[8,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[8,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[8,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[8,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[8,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[8,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[8,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[8,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[8,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[8,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[8,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[8,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[8,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[8,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[8,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[8,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[8,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[8,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[8,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[8,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[8,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[8,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[8,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[8,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[8,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[8,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[8,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[8,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[8,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[8,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[8,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[8,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[8,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[8,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[8,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[8,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[8,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[8,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[8,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[8,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[8,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[8,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[8,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=16,n_used=16,b=0,m=32,n=1024,k=16,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=2,n_used=2,b=0,m=32,n=8192,k=64,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=16,n_used=16,b=0,m=50,n=200,k=64,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=16,n_used=16,b=1,m=32,n=1024,k=16,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=2,n_used=2,b=1,m=32,n=8192,k=64,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=16,n_used=16,b=1,m=50,n=200,k=64,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=1,n_used=1,b=0,m=8,n=16,k=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=16,n_used=16,b=0,m=32,n=32,k=32,o=3","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q5_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q5_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q5_1,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q5_1,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q2_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q2_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q3_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q3_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q5_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q5_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q6_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q6_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_s,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_s,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq3_xxs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq3_xxs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq1_s,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq1_s,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq1_m,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq1_m,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq4_nl,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq4_nl,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq3_s,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq3_s,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq4_xs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq4_xs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=bf16,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=bf16,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" "CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","1","yes","CUDA" "CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","1","yes","CUDA" "CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","1","yes","CUDA" @@ -4517,6 +8190,134 @@ "CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA" "CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA" "CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA" "CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA" "CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA" "CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA" @@ -4645,6 +8446,42 @@ "CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA" "CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA" "CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA" +"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=4,n_experts_used=1,n_token=1","support","1","yes","CUDA" +"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=4,n_experts_used=1,n_token=32","support","1","yes","CUDA" +"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=4,n_experts_used=1,n_token=129","support","1","yes","CUDA" +"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=4,n_experts_used=1,n_token=1","support","1","yes","CUDA" +"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=4,n_experts_used=1,n_token=32","support","1","yes","CUDA" +"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=4,n_experts_used=1,n_token=129","support","1","yes","CUDA" +"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=4,n_experts_used=2,n_token=1","support","1","yes","CUDA" +"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=4,n_experts_used=2,n_token=32","support","1","yes","CUDA" +"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=4,n_experts_used=2,n_token=129","support","1","yes","CUDA" +"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=4,n_experts_used=2,n_token=1","support","1","yes","CUDA" +"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=4,n_experts_used=2,n_token=32","support","1","yes","CUDA" +"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=4,n_experts_used=2,n_token=129","support","1","yes","CUDA" +"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=4,n_experts_used=4,n_token=1","support","1","yes","CUDA" +"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=4,n_experts_used=4,n_token=32","support","1","yes","CUDA" +"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=4,n_experts_used=4,n_token=129","support","1","yes","CUDA" +"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=4,n_experts_used=4,n_token=1","support","1","yes","CUDA" +"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=4,n_experts_used=4,n_token=32","support","1","yes","CUDA" +"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=4,n_experts_used=4,n_token=129","support","1","yes","CUDA" +"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=8,n_experts_used=1,n_token=1","support","1","yes","CUDA" +"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=8,n_experts_used=1,n_token=32","support","1","yes","CUDA" +"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=8,n_experts_used=1,n_token=129","support","1","yes","CUDA" +"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=8,n_experts_used=1,n_token=1","support","1","yes","CUDA" +"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=8,n_experts_used=1,n_token=32","support","1","yes","CUDA" +"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=8,n_experts_used=1,n_token=129","support","1","yes","CUDA" +"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=8,n_experts_used=2,n_token=1","support","1","yes","CUDA" +"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=8,n_experts_used=2,n_token=32","support","1","yes","CUDA" +"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=8,n_experts_used=2,n_token=129","support","1","yes","CUDA" +"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=8,n_experts_used=2,n_token=1","support","1","yes","CUDA" +"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=8,n_experts_used=2,n_token=32","support","1","yes","CUDA" +"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=8,n_experts_used=2,n_token=129","support","1","yes","CUDA" +"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=8,n_experts_used=4,n_token=1","support","1","yes","CUDA" +"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=8,n_experts_used=4,n_token=32","support","1","yes","CUDA" +"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=8,n_experts_used=4,n_token=129","support","1","yes","CUDA" +"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=8,n_experts_used=4,n_token=1","support","1","yes","CUDA" +"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=8,n_experts_used=4,n_token=32","support","1","yes","CUDA" +"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=8,n_experts_used=4,n_token=129","support","1","yes","CUDA" "CUDA0","SQR","type=f16,ne=[10,5,4,3]","support","1","yes","CUDA" "CUDA0","SQRT","type=f16,ne=[10,3,3,2]","support","1","yes","CUDA" "CUDA0","LOG","type=f16,ne=[10,5,4,3]","support","1","yes","CUDA" @@ -4660,141 +8497,253 @@ "CUDA0","DIAG_MASK_INF","type=f32,ne=[10,10,1,1],n_past=5","support","1","yes","CUDA" "CUDA0","DIAG_MASK_INF","type=f32,ne=[10,10,3,1],n_past=5","support","1","yes","CUDA" "CUDA0","DIAG_MASK_INF","type=f32,ne=[10,10,3,2],n_past=5","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,m_prec=f32,nr23=[3,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f32,nr23=[2,3],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,m_prec=f16,nr23=[3,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f16,nr23=[2,3],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,m_prec=f32,nr23=[3,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f32,nr23=[2,3],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,m_prec=f16,nr23=[3,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f16,nr23=[2,3],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,m_prec=f32,nr23=[3,1],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f32,nr23=[2,3],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,m_prec=f16,nr23=[3,1],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f16,nr23=[2,3],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,m_prec=f32,nr23=[3,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f32,nr23=[2,3],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,m_prec=f16,nr23=[3,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f16,nr23=[2,3],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,2,32,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,2,32,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,2,32,1],mask=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[32,2,32,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[32,2,32,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[32,2,32,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[32,2,32,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=0,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=0,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=0,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=0,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=0,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=0,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=0,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=0,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=0,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=0,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=0,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=0,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=0,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=0,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=0,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=0,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=0,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=0,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=0,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=0,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=0,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=0,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=0,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=0,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=0,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=0,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=0,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=0,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=0,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=0,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=0,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=0,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,sinks=0,m_prec=f32,nr23=[3,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=0,m_prec=f32,nr23=[2,3],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,sinks=0,m_prec=f16,nr23=[3,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=0,m_prec=f16,nr23=[2,3],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,sinks=0,m_prec=f32,nr23=[3,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=0,m_prec=f32,nr23=[2,3],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,sinks=0,m_prec=f16,nr23=[3,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=0,m_prec=f16,nr23=[2,3],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,sinks=0,m_prec=f32,nr23=[3,1],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=0,m_prec=f32,nr23=[2,3],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,sinks=0,m_prec=f16,nr23=[3,1],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=0,m_prec=f16,nr23=[2,3],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,sinks=0,m_prec=f32,nr23=[3,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=0,m_prec=f32,nr23=[2,3],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,sinks=0,m_prec=f16,nr23=[3,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=0,m_prec=f16,nr23=[2,3],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,sinks=1,m_prec=f32,nr23=[3,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=1,m_prec=f32,nr23=[2,3],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,sinks=1,m_prec=f16,nr23=[3,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=1,m_prec=f16,nr23=[2,3],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,sinks=1,m_prec=f32,nr23=[3,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=1,m_prec=f32,nr23=[2,3],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,sinks=1,m_prec=f16,nr23=[3,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=1,m_prec=f16,nr23=[2,3],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,sinks=1,m_prec=f32,nr23=[3,1],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=1,m_prec=f32,nr23=[2,3],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,sinks=1,m_prec=f16,nr23=[3,1],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=1,m_prec=f16,nr23=[2,3],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,sinks=1,m_prec=f32,nr23=[3,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=1,m_prec=f32,nr23=[2,3],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,sinks=1,m_prec=f16,nr23=[3,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=1,m_prec=f16,nr23=[2,3],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,2,32,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,2,32,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,2,32,1],mask=0,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[32,2,32,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[32,2,32,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[32,2,32,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[32,2,32,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA" "CUDA0","SOFT_MAX_BACK","type=f32,ne=[16,16,1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" "CUDA0","SOFT_MAX_BACK","type=f32,ne=[15,15,1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX_BACK","type=f32,ne=[16,16,2,3],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" "CUDA0","SOFT_MAX_BACK","type=f32,ne=[16,1024,1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" "CUDA0","SOFT_MAX_BACK","type=f32,ne=[15,1023,1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX_BACK","type=f32,ne=[16,1024,2,3],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" "CUDA0","SOFT_MAX_BACK","type=f32,ne=[1024,16,1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" "CUDA0","SOFT_MAX_BACK","type=f32,ne=[1023,15,1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX_BACK","type=f32,ne=[1024,16,2,3],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" "CUDA0","SOFT_MAX_BACK","type=f32,ne=[1024,1024,1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" "CUDA0","SOFT_MAX_BACK","type=f32,ne=[1023,1023,1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX_BACK","type=f32,ne=[1024,1024,2,3],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" "CUDA0","SOFT_MAX_BACK","type=f32,ne=[16,16,1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" "CUDA0","SOFT_MAX_BACK","type=f32,ne=[15,15,1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX_BACK","type=f32,ne=[16,16,2,3],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" "CUDA0","SOFT_MAX_BACK","type=f32,ne=[16,1024,1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" "CUDA0","SOFT_MAX_BACK","type=f32,ne=[15,1023,1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX_BACK","type=f32,ne=[16,1024,2,3],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" "CUDA0","SOFT_MAX_BACK","type=f32,ne=[1024,16,1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" "CUDA0","SOFT_MAX_BACK","type=f32,ne=[1023,15,1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX_BACK","type=f32,ne=[1024,16,2,3],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" "CUDA0","SOFT_MAX_BACK","type=f32,ne=[1024,1024,1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" "CUDA0","SOFT_MAX_BACK","type=f32,ne=[1023,1023,1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX_BACK","type=f32,ne=[1024,1024,2,3],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" "CUDA0","SOFT_MAX_BACK","type=f32,ne=[16,16,1,1],scale=1.000000,max_bias=8.000000","support","0","no","CUDA" "CUDA0","SOFT_MAX_BACK","type=f32,ne=[15,15,1,1],scale=1.000000,max_bias=8.000000","support","0","no","CUDA" +"CUDA0","SOFT_MAX_BACK","type=f32,ne=[16,16,2,3],scale=1.000000,max_bias=8.000000","support","0","no","CUDA" "CUDA0","SOFT_MAX_BACK","type=f32,ne=[16,1024,1,1],scale=1.000000,max_bias=8.000000","support","0","no","CUDA" "CUDA0","SOFT_MAX_BACK","type=f32,ne=[15,1023,1,1],scale=1.000000,max_bias=8.000000","support","0","no","CUDA" +"CUDA0","SOFT_MAX_BACK","type=f32,ne=[16,1024,2,3],scale=1.000000,max_bias=8.000000","support","0","no","CUDA" "CUDA0","SOFT_MAX_BACK","type=f32,ne=[1024,16,1,1],scale=1.000000,max_bias=8.000000","support","0","no","CUDA" "CUDA0","SOFT_MAX_BACK","type=f32,ne=[1023,15,1,1],scale=1.000000,max_bias=8.000000","support","0","no","CUDA" +"CUDA0","SOFT_MAX_BACK","type=f32,ne=[1024,16,2,3],scale=1.000000,max_bias=8.000000","support","0","no","CUDA" "CUDA0","SOFT_MAX_BACK","type=f32,ne=[1024,1024,1,1],scale=1.000000,max_bias=8.000000","support","0","no","CUDA" "CUDA0","SOFT_MAX_BACK","type=f32,ne=[1023,1023,1,1],scale=1.000000,max_bias=8.000000","support","0","no","CUDA" +"CUDA0","SOFT_MAX_BACK","type=f32,ne=[1024,1024,2,3],scale=1.000000,max_bias=8.000000","support","0","no","CUDA" "CUDA0","SOFT_MAX_BACK","type=f32,ne=[16,16,1,1],scale=0.100000,max_bias=8.000000","support","0","no","CUDA" "CUDA0","SOFT_MAX_BACK","type=f32,ne=[15,15,1,1],scale=0.100000,max_bias=8.000000","support","0","no","CUDA" +"CUDA0","SOFT_MAX_BACK","type=f32,ne=[16,16,2,3],scale=0.100000,max_bias=8.000000","support","0","no","CUDA" "CUDA0","SOFT_MAX_BACK","type=f32,ne=[16,1024,1,1],scale=0.100000,max_bias=8.000000","support","0","no","CUDA" "CUDA0","SOFT_MAX_BACK","type=f32,ne=[15,1023,1,1],scale=0.100000,max_bias=8.000000","support","0","no","CUDA" +"CUDA0","SOFT_MAX_BACK","type=f32,ne=[16,1024,2,3],scale=0.100000,max_bias=8.000000","support","0","no","CUDA" "CUDA0","SOFT_MAX_BACK","type=f32,ne=[1024,16,1,1],scale=0.100000,max_bias=8.000000","support","0","no","CUDA" "CUDA0","SOFT_MAX_BACK","type=f32,ne=[1023,15,1,1],scale=0.100000,max_bias=8.000000","support","0","no","CUDA" +"CUDA0","SOFT_MAX_BACK","type=f32,ne=[1024,16,2,3],scale=0.100000,max_bias=8.000000","support","0","no","CUDA" "CUDA0","SOFT_MAX_BACK","type=f32,ne=[1024,1024,1,1],scale=0.100000,max_bias=8.000000","support","0","no","CUDA" "CUDA0","SOFT_MAX_BACK","type=f32,ne=[1023,1023,1,1],scale=0.100000,max_bias=8.000000","support","0","no","CUDA" +"CUDA0","SOFT_MAX_BACK","type=f32,ne=[1024,1024,2,3],scale=0.100000,max_bias=8.000000","support","0","no","CUDA" "CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA" "CUDA0","ROPE","type=f32,ne_a=[128,40,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA" "CUDA0","ROPE","type=f32,ne_a=[128,52,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA" @@ -5222,9 +9171,11 @@ "CUDA0","ARGSORT","type=f32,ne=[8,1,1,1],order=0","support","1","yes","CUDA" "CUDA0","ARGSORT","type=f32,ne=[16,10,10,10],order=0","support","1","yes","CUDA" "CUDA0","ARGSORT","type=f32,ne=[60,10,10,10],order=0","support","1","yes","CUDA" +"CUDA0","ARGSORT","type=f32,ne=[1024,1,1,1],order=0","support","1","yes","CUDA" "CUDA0","ARGSORT","type=f32,ne=[8,1,1,1],order=1","support","1","yes","CUDA" "CUDA0","ARGSORT","type=f32,ne=[16,10,10,10],order=1","support","1","yes","CUDA" "CUDA0","ARGSORT","type=f32,ne=[60,10,10,10],order=1","support","1","yes","CUDA" +"CUDA0","ARGSORT","type=f32,ne=[1024,1,1,1],order=1","support","1","yes","CUDA" "CUDA0","UPSCALE","type=f32,ne=[512,512,3,2],scale_factor=2,mode=nearest,transpose=0","support","1","yes","CUDA" "CUDA0","UPSCALE","type=f32,ne=[512,512,3,2],scale_factor=2,mode=nearest,transpose=1","support","1","yes","CUDA" "CUDA0","UPSCALE","type=f32,ne=[2,5,7,11],ne_tgt=[5,7,11,13],mode=0","support","1","yes","CUDA" @@ -5235,2115 +9186,5287 @@ "CUDA0","UPSCALE","type=f32,ne=[5,7,11,13],ne_tgt=[2,5,7,11],mode=1","support","1","yes","CUDA" "CUDA0","UPSCALE","type=f32,ne=[2,5,7,11],ne_tgt=[5,7,11,13],mode=257","support","1","yes","CUDA" "CUDA0","SUM","type=f32,ne=[10,5,4,3]","support","1","yes","CUDA" -"CUDA0","SUM_ROWS","type=f32,ne=[10,5,4,3]","support","1","yes","CUDA" +"CUDA0","SUM_ROWS","type=f32,ne=[10,5,4,3],permute=0,slice=0","support","1","yes","CUDA" +"CUDA0","SUM_ROWS","type=f32,ne=[11,5,6,3],permute=1,slice=0","support","0","no","CUDA" +"CUDA0","SUM_ROWS","type=f32,ne=[11,5,6,3],permute=0,slice=1","support","0","no","CUDA" +"CUDA0","SUM_ROWS","type=f32,ne=[11,5,6,3],permute=1,slice=1","support","0","no","CUDA" "CUDA0","MEAN","type=f32,ne=[10,5,4,3]","support","1","yes","CUDA" +"CUDA0","SUM","type=f32,ne=[33,1,1,1]","support","1","yes","CUDA" +"CUDA0","SUM_ROWS","type=f32,ne=[33,1,1,1],permute=0,slice=0","support","1","yes","CUDA" +"CUDA0","MEAN","type=f32,ne=[33,1,1,1]","support","1","yes","CUDA" +"CUDA0","SUM","type=f32,ne=[33,1024,1,1]","support","1","yes","CUDA" +"CUDA0","SUM_ROWS","type=f32,ne=[33,1024,1,1],permute=0,slice=0","support","1","yes","CUDA" +"CUDA0","SUM","type=f32,ne=[33,256,1,1]","support","1","yes","CUDA" +"CUDA0","SUM_ROWS","type=f32,ne=[33,256,1,1],permute=0,slice=0","support","1","yes","CUDA" +"CUDA0","MEAN","type=f32,ne=[33,256,1,1]","support","1","yes","CUDA" +"CUDA0","MEAN","type=f32,ne=[32769,1,1,1]","support","1","yes","CUDA" "CUDA0","GROUP_NORM","type=f32,ne=[64,64,320,1],num_groups=32,eps=0.000001","support","1","yes","CUDA" "CUDA0","GROUP_NORM","type=f32,ne=[9,9,1280,1],num_groups=32,eps=0.000001","support","1","yes","CUDA" +"CUDA0","GROUP_NORM_MUL_ADD","type=f32,ne=[64,64,320,1],num_groups=4,eps=0.000010","support","1","yes","CUDA" +"CUDA0","GROUP_NORM_MUL_ADD","type=f32,ne=[9,9,1280,1],num_groups=4,eps=0.000010","support","1","yes","CUDA" "CUDA0","ACC","type=f32,ne_a=[256,17,1,1],ne_b=[256,16,1,1]","support","1","yes","CUDA" "CUDA0","PAD","type=f32,ne_a=[512,512,1,1],pad_0=1,pad_1=1","support","1","yes","CUDA" -"CUDA0","PAD_REFLECT_1D","type=f32,ne_a=[512,34,2,1],pad_0=10,pad_1=9","support","0","no","CUDA" -"CUDA0","ROLL","shift0=3,shift1=-2,shift3=1,shift4=-1","support","0","no","CUDA" +"CUDA0","PAD","type=f32,ne_a=[512,512,3,1],lp0=1,rp0=1,lp1=1,rp1=1,lp2=1,rp2=1,lp3=1,rp3=1,v=0","support","1","yes","CUDA" +"CUDA0","PAD_REFLECT_1D","type=f32,ne_a=[512,34,2,1],pad_0=10,pad_1=9","support","1","yes","CUDA" +"CUDA0","ROLL","shift0=3,shift1=-2,shift3=1,shift4=-1","support","1","yes","CUDA" "CUDA0","ARANGE","type=f32,start=0.000000,stop=10.000000,step=1.000000","support","1","yes","CUDA" "CUDA0","TIMESTEP_EMBEDDING","type=f32,ne_a=[2,1,1,1],dim=320,max_period=10000","support","1","yes","CUDA" "CUDA0","LEAKY_RELU","type=f32,ne_a=[10,5,4,3],negative_slope=0.100000","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","PAD","type=f32,ne_a=[512,512,1,1],lp0=0,rp0=1,lp1=0,rp1=1,lp2=0,rp2=0,lp3=0,rp3=0,v=0","support","1","yes","CUDA" +"CUDA0","PAD","type=f32,ne_a=[11,22,33,44],lp0=1,rp0=2,lp1=3,rp1=4,lp2=5,rp2=6,lp3=7,rp3=8,v=0","support","1","yes","CUDA" +"CUDA0","PAD","type=f32,ne_a=[512,512,1,1],lp0=0,rp0=1,lp1=0,rp1=1,lp2=0,rp2=0,lp3=0,rp3=0,v=1","support","0","no","CUDA" +"CUDA0","PAD","type=f32,ne_a=[11,22,33,44],lp0=1,rp0=2,lp1=3,rp1=4,lp2=5,rp2=6,lp3=7,rp3=8,v=1","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" "CUDA0","CROSS_ENTROPY_LOSS","type=f32,ne=[10,5,4,3]","support","1","yes","CUDA" "CUDA0","CROSS_ENTROPY_LOSS","type=f32,ne=[30000,1,1,1]","support","1","yes","CUDA" "CUDA0","CROSS_ENTROPY_LOSS_BACK","type=f32,ne=[10,5,4,3]","support","1","yes","CUDA" "CUDA0","CROSS_ENTROPY_LOSS_BACK","type=f32,ne=[30000,1,1,1]","support","1","yes","CUDA" "CUDA0","OPT_STEP_ADAMW","type=f32,ne=[10,5,4,3]","support","1","yes","CUDA" +"CUDA0","OPT_STEP_SGD","type=f32,ne=[10,5,4,3]","support","1","yes","CUDA" From 4ed928bacbd63da456b173b924309cbf647fd3c9 Mon Sep 17 00:00:00 2001 From: mnehete32 Date: Tue, 4 Nov 2025 21:56:31 +0530 Subject: [PATCH 3/5] docs: update cuda ops --- docs/ops.md | 40 +- docs/ops/CUDA.csv | 21162 +++++++++++++++++++++++++++++++++----------- 2 files changed, 15944 insertions(+), 5258 deletions(-) diff --git a/docs/ops.md b/docs/ops.md index 3738a48072832..cc89c7aa8b36d 100644 --- a/docs/ops.md +++ b/docs/ops.md @@ -16,17 +16,17 @@ Legend: |-----------|------|------|------|------|------|------|------|------|------| | ABS | ❌ | ✅ | ✅ | 🟡 | 🟡 | ❌ | 🟡 | ❌ | ❌ | | ACC | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ | -| ADD | ❌ | ✅ | ✅ | ✅ | 🟡 | 🟡 | ✅ | ✅ | ❌ | +| ADD | ❌ | ✅ | ✅ | ❌ | 🟡 | 🟡 | ✅ | ✅ | ❌ | | ADD1 | ❌ | ✅ | ✅ | ✅ | ❌ | ❌ | ✅ | ❌ | ❌ | -| ADD_ID | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | +| ADD_ID | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | | ARANGE | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | | ARGMAX | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ | | ARGSORT | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | -| CEIL | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | +| CEIL | ❌ | ❌ | ✅ | 🟡 | ❌ | ❌ | ✅ | ❌ | ❌ | | CLAMP | ❌ | ✅ | ✅ | ✅ | 🟡 | 🟡 | ✅ | 🟡 | ❌ | | CONCAT | ❌ | ✅ | ✅ | 🟡 | ✅ | 🟡 | 🟡 | ✅ | ❌ | | CONT | ❌ | 🟡 | ✅ | ✅ | ✅ | 🟡 | 🟡 | 🟡 | ❌ | -| CONV_2D | ❌ | ❌ | ✅ | ❌ | ❌ | ✅ | ❌ | ✅ | ❌ | +| CONV_2D | ❌ | ❌ | ✅ | ✅ | ❌ | ✅ | ❌ | ✅ | ❌ | | CONV_2D_DW | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ | | CONV_3D | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | | CONV_TRANSPOSE_1D | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ | @@ -37,12 +37,12 @@ Legend: | CROSS_ENTROPY_LOSS | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | | CROSS_ENTROPY_LOSS_BACK | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | | DIAG_MASK_INF | ❌ | ✅ | ✅ | ✅ | 🟡 | 🟡 | ✅ | ✅ | ❌ | -| DIV | ❌ | ✅ | ✅ | ✅ | 🟡 | 🟡 | ✅ | ✅ | ❌ | +| DIV | ❌ | ✅ | ✅ | ❌ | 🟡 | 🟡 | ✅ | ✅ | ❌ | | DUP | ❌ | ✅ | ✅ | 🟡 | 🟡 | 🟡 | ✅ | 🟡 | ❌ | | ELU | ❌ | ✅ | ✅ | 🟡 | 🟡 | ❌ | 🟡 | ❌ | ❌ | | EXP | ❌ | ✅ | ✅ | 🟡 | 🟡 | ❌ | 🟡 | ❌ | ❌ | | FLASH_ATTN_EXT | ❌ | 🟡 | ✅ | 🟡 | 🟡 | ❌ | ❌ | 🟡 | ❌ | -| FLOOR | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | +| FLOOR | ❌ | ❌ | ✅ | 🟡 | ❌ | ❌ | ✅ | ❌ | ❌ | | GATED_LINEAR_ATTN | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ✅ | ❌ | ❌ | | GEGLU | ❌ | ✅ | ✅ | ✅ | 🟡 | ✅ | ✅ | 🟡 | ❌ | | GEGLU_ERF | ❌ | ✅ | ✅ | ✅ | 🟡 | ✅ | ✅ | 🟡 | ❌ | @@ -57,22 +57,22 @@ Legend: | HARDSIGMOID | ❌ | ✅ | ✅ | 🟡 | 🟡 | ❌ | 🟡 | ❌ | ❌ | | HARDSWISH | ❌ | ✅ | ✅ | 🟡 | 🟡 | ❌ | 🟡 | ❌ | ❌ | | IM2COL | ❌ | ✅ | ✅ | ✅ | 🟡 | ✅ | ✅ | ✅ | ❌ | -| IM2COL_3D | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | +| IM2COL_3D | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | | L2_NORM | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ | | LEAKY_RELU | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ | | LOG | ❌ | ✅ | ✅ | ✅ | ❌ | ❌ | ✅ | ❌ | ❌ | | MEAN | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | -| MUL | ❌ | ✅ | ✅ | ✅ | 🟡 | 🟡 | ✅ | ✅ | ❌ | +| MUL | ❌ | ✅ | ✅ | ❌ | 🟡 | 🟡 | ✅ | ✅ | ❌ | | MUL_MAT | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | | MUL_MAT_ID | ❌ | 🟡 | ✅ | ✅ | ✅ | 🟡 | 🟡 | ✅ | ❌ | | NEG | ❌ | ✅ | ✅ | 🟡 | 🟡 | ❌ | 🟡 | ❌ | ❌ | | NORM | ❌ | ✅ | ✅ | ✅ | 🟡 | ✅ | ✅ | 🟡 | ❌ | | NORM_MUL_ADD | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | | OPT_STEP_ADAMW | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ | -| OPT_STEP_SGD | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | +| OPT_STEP_SGD | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | | OUT_PROD | 🟡 | ❌ | 🟡 | 🟡 | ❌ | ❌ | 🟡 | ❌ | ❌ | -| PAD | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | 🟡 | ✅ | ❌ | -| PAD_REFLECT_1D | ❌ | ✅ | ✅ | ❌ | ✅ | ❌ | ✅ | ❌ | ❌ | +| PAD | ❌ | ✅ | ✅ | 🟡 | ✅ | ✅ | 🟡 | ✅ | ❌ | +| PAD_REFLECT_1D | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ❌ | ❌ | | POOL_2D | ❌ | 🟡 | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ | | REGLU | ❌ | ✅ | ✅ | ✅ | 🟡 | ✅ | ✅ | 🟡 | ❌ | | RELU | ❌ | ✅ | ✅ | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | ❌ | @@ -80,15 +80,15 @@ Legend: | REPEAT_BACK | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ | | RMS_NORM | ❌ | ✅ | ✅ | ✅ | 🟡 | ✅ | ✅ | ✅ | ❌ | | RMS_NORM_BACK | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ✅ | ✅ | ❌ | -| RMS_NORM_MUL_ADD | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | -| ROLL | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | +| RMS_NORM_MUL_ADD | ❌ | ✅ | ✅ | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | +| ROLL | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ | | ROPE | ❌ | 🟡 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | | ROPE_BACK | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ | -| ROUND | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | +| ROUND | ❌ | ❌ | ✅ | 🟡 | ❌ | ❌ | ✅ | ❌ | ❌ | | RWKV_WKV6 | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ | | RWKV_WKV7 | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ | | SCALE | ❌ | 🟡 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | -| SET | ❌ | ❌ | ✅ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | +| SET | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | | SET_ROWS | ❌ | ❌ | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | ❌ | | SGN | ❌ | ✅ | ✅ | 🟡 | 🟡 | ❌ | 🟡 | ❌ | ❌ | | SIGMOID | ❌ | ✅ | ✅ | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | ❌ | @@ -103,14 +103,14 @@ Legend: | SSM_CONV | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | ❌ | ✅ | ❌ | | SSM_SCAN | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | ❌ | ✅ | ❌ | | STEP | ❌ | ✅ | ✅ | 🟡 | 🟡 | ❌ | 🟡 | ❌ | ❌ | -| SUB | ❌ | ✅ | ✅ | ✅ | 🟡 | 🟡 | ✅ | ✅ | ❌ | -| SUM | ❌ | ✅ | ✅ | ✅ | ❌ | ❌ | ✅ | ✅ | ❌ | -| SUM_ROWS | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | 🟡 | ✅ | ❌ | +| SUB | ❌ | ✅ | ✅ | ❌ | 🟡 | 🟡 | ✅ | ✅ | ❌ | +| SUM | ❌ | ✅ | ✅ | 🟡 | ❌ | ❌ | ✅ | ✅ | ❌ | +| SUM_ROWS | ❌ | ✅ | ✅ | 🟡 | ✅ | ✅ | 🟡 | ✅ | ❌ | | SWIGLU | ❌ | ✅ | ✅ | ✅ | 🟡 | ✅ | ✅ | 🟡 | ❌ | -| SWIGLU_OAI | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | +| SWIGLU_OAI | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | | TANH | ❌ | ✅ | ✅ | 🟡 | 🟡 | ✅ | 🟡 | 🟡 | ❌ | | TIMESTEP_EMBEDDING | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | | TOPK_MOE | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | -| TRUNC | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | +| TRUNC | ❌ | ❌ | ✅ | 🟡 | ❌ | ❌ | ✅ | ❌ | ❌ | | UPSCALE | ❌ | 🟡 | ✅ | ✅ | 🟡 | ✅ | 🟡 | ✅ | ❌ | | XIELU | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | diff --git a/docs/ops/CUDA.csv b/docs/ops/CUDA.csv index 71e47977e31d1..8eb31f22d2c08 100644 --- a/docs/ops/CUDA.csv +++ b/docs/ops/CUDA.csv @@ -29,6 +29,16 @@ "CUDA0","EXP","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","CUDA" "CUDA0","GELU_ERF","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","CUDA" "CUDA0","GELU_ERF","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","CUDA" +"CUDA0","XIELU","type=f16,ne_a=[128,2,2,2],v=0","support","0","no","CUDA" +"CUDA0","XIELU","type=f16,ne_a=[5,7,11,13],v=0","support","0","no","CUDA" +"CUDA0","FLOOR","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","CUDA" +"CUDA0","FLOOR","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","CUDA" +"CUDA0","CEIL","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","CUDA" +"CUDA0","CEIL","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","CUDA" +"CUDA0","ROUND","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","CUDA" +"CUDA0","ROUND","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","CUDA" +"CUDA0","TRUNC","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","CUDA" +"CUDA0","TRUNC","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","CUDA" "CUDA0","ABS","type=f16,ne_a=[128,2,2,2],v=1","support","0","no","CUDA" "CUDA0","ABS","type=f16,ne_a=[5,7,11,13],v=1","support","0","no","CUDA" "CUDA0","SGN","type=f16,ne_a=[128,2,2,2],v=1","support","0","no","CUDA" @@ -59,6 +69,16 @@ "CUDA0","EXP","type=f16,ne_a=[5,7,11,13],v=1","support","0","no","CUDA" "CUDA0","GELU_ERF","type=f16,ne_a=[128,2,2,2],v=1","support","0","no","CUDA" "CUDA0","GELU_ERF","type=f16,ne_a=[5,7,11,13],v=1","support","0","no","CUDA" +"CUDA0","XIELU","type=f16,ne_a=[128,2,2,2],v=1","support","0","no","CUDA" +"CUDA0","XIELU","type=f16,ne_a=[5,7,11,13],v=1","support","0","no","CUDA" +"CUDA0","FLOOR","type=f16,ne_a=[128,2,2,2],v=1","support","0","no","CUDA" +"CUDA0","FLOOR","type=f16,ne_a=[5,7,11,13],v=1","support","0","no","CUDA" +"CUDA0","CEIL","type=f16,ne_a=[128,2,2,2],v=1","support","0","no","CUDA" +"CUDA0","CEIL","type=f16,ne_a=[5,7,11,13],v=1","support","0","no","CUDA" +"CUDA0","ROUND","type=f16,ne_a=[128,2,2,2],v=1","support","0","no","CUDA" +"CUDA0","ROUND","type=f16,ne_a=[5,7,11,13],v=1","support","0","no","CUDA" +"CUDA0","TRUNC","type=f16,ne_a=[128,2,2,2],v=1","support","0","no","CUDA" +"CUDA0","TRUNC","type=f16,ne_a=[5,7,11,13],v=1","support","0","no","CUDA" "CUDA0","ABS","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","CUDA" "CUDA0","ABS","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","CUDA" "CUDA0","SGN","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","CUDA" @@ -89,6 +109,16 @@ "CUDA0","EXP","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","CUDA" "CUDA0","GELU_ERF","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","CUDA" "CUDA0","GELU_ERF","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","CUDA" +"CUDA0","XIELU","type=f32,ne_a=[128,2,2,2],v=0","support","0","no","CUDA" +"CUDA0","XIELU","type=f32,ne_a=[5,7,11,13],v=0","support","0","no","CUDA" +"CUDA0","FLOOR","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","CUDA" +"CUDA0","FLOOR","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","CUDA" +"CUDA0","CEIL","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","CUDA" +"CUDA0","CEIL","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","CUDA" +"CUDA0","ROUND","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","CUDA" +"CUDA0","ROUND","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","CUDA" +"CUDA0","TRUNC","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","CUDA" +"CUDA0","TRUNC","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","CUDA" "CUDA0","ABS","type=f32,ne_a=[128,2,2,2],v=1","support","0","no","CUDA" "CUDA0","ABS","type=f32,ne_a=[5,7,11,13],v=1","support","0","no","CUDA" "CUDA0","SGN","type=f32,ne_a=[128,2,2,2],v=1","support","0","no","CUDA" @@ -119,6 +149,16 @@ "CUDA0","EXP","type=f32,ne_a=[5,7,11,13],v=1","support","0","no","CUDA" "CUDA0","GELU_ERF","type=f32,ne_a=[128,2,2,2],v=1","support","0","no","CUDA" "CUDA0","GELU_ERF","type=f32,ne_a=[5,7,11,13],v=1","support","0","no","CUDA" +"CUDA0","XIELU","type=f32,ne_a=[128,2,2,2],v=1","support","0","no","CUDA" +"CUDA0","XIELU","type=f32,ne_a=[5,7,11,13],v=1","support","0","no","CUDA" +"CUDA0","FLOOR","type=f32,ne_a=[128,2,2,2],v=1","support","0","no","CUDA" +"CUDA0","FLOOR","type=f32,ne_a=[5,7,11,13],v=1","support","0","no","CUDA" +"CUDA0","CEIL","type=f32,ne_a=[128,2,2,2],v=1","support","0","no","CUDA" +"CUDA0","CEIL","type=f32,ne_a=[5,7,11,13],v=1","support","0","no","CUDA" +"CUDA0","ROUND","type=f32,ne_a=[128,2,2,2],v=1","support","0","no","CUDA" +"CUDA0","ROUND","type=f32,ne_a=[5,7,11,13],v=1","support","0","no","CUDA" +"CUDA0","TRUNC","type=f32,ne_a=[128,2,2,2],v=1","support","0","no","CUDA" +"CUDA0","TRUNC","type=f32,ne_a=[5,7,11,13],v=1","support","0","no","CUDA" "CUDA0","REGLU","type=f16,ne_a=[128,2,2,2],v=0,swapped=0","support","1","yes","CUDA" "CUDA0","REGLU","type=f16,ne_a=[5,7,11,13],v=0,swapped=0","support","1","yes","CUDA" "CUDA0","REGLU","type=f16,ne_a=[128,2,2,2],v=0,swapped=1","support","1","yes","CUDA" @@ -239,99 +279,117 @@ "CUDA0","GEGLU_QUICK","type=f32,ne_a=[5,7,11,13],v=1,swapped=1","support","1","yes","CUDA" "CUDA0","GEGLU_QUICK","type=f32,ne_a=[128,2,2,2],v=1,split","support","1","yes","CUDA" "CUDA0","GEGLU_QUICK","type=f32,ne_a=[5,7,11,13],v=1,split","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=f32,n=1,m=8,r=2,b=1,v=0","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=f32,n=256,m=5,r=4,b=1,v=0","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=f32,n=256,m=5,r=4,b=1,v=1","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=f32,n=256,m=5,r=4,b=7,v=0","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=f32,n=256,m=5,r=4,b=7,v=1","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=f16,n=256,m=5,r=4,b=1,v=0","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=f16,n=256,m=5,r=4,b=1,v=1","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=f16,n=256,m=5,r=4,b=7,v=0","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=f16,n=256,m=5,r=4,b=7,v=1","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=bf16,n=256,m=5,r=4,b=1,v=0","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=bf16,n=256,m=5,r=4,b=1,v=1","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=bf16,n=256,m=5,r=4,b=7,v=0","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=bf16,n=256,m=5,r=4,b=7,v=1","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=q4_0,n=256,m=5,r=4,b=1,v=0","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=q4_0,n=256,m=5,r=4,b=1,v=1","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=q4_0,n=256,m=5,r=4,b=7,v=0","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=q4_0,n=256,m=5,r=4,b=7,v=1","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=q4_1,n=256,m=5,r=4,b=1,v=0","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=q4_1,n=256,m=5,r=4,b=1,v=1","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=q4_1,n=256,m=5,r=4,b=7,v=0","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=q4_1,n=256,m=5,r=4,b=7,v=1","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=q5_0,n=256,m=5,r=4,b=1,v=0","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=q5_0,n=256,m=5,r=4,b=1,v=1","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=q5_0,n=256,m=5,r=4,b=7,v=0","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=q5_0,n=256,m=5,r=4,b=7,v=1","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=q5_1,n=256,m=5,r=4,b=1,v=0","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=q5_1,n=256,m=5,r=4,b=1,v=1","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=q5_1,n=256,m=5,r=4,b=7,v=0","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=q5_1,n=256,m=5,r=4,b=7,v=1","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=q8_0,n=256,m=5,r=4,b=1,v=0","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=q8_0,n=256,m=5,r=4,b=1,v=1","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=q8_0,n=256,m=5,r=4,b=7,v=0","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=q8_0,n=256,m=5,r=4,b=7,v=1","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=q2_K,n=256,m=5,r=4,b=1,v=0","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=q2_K,n=256,m=5,r=4,b=1,v=1","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=q2_K,n=256,m=5,r=4,b=7,v=0","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=q2_K,n=256,m=5,r=4,b=7,v=1","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=q3_K,n=256,m=5,r=4,b=1,v=0","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=q3_K,n=256,m=5,r=4,b=1,v=1","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=q3_K,n=256,m=5,r=4,b=7,v=0","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=q3_K,n=256,m=5,r=4,b=7,v=1","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=q4_K,n=256,m=5,r=4,b=1,v=0","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=q4_K,n=256,m=5,r=4,b=1,v=1","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=q4_K,n=256,m=5,r=4,b=7,v=0","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=q4_K,n=256,m=5,r=4,b=7,v=1","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=q5_K,n=256,m=5,r=4,b=1,v=0","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=q5_K,n=256,m=5,r=4,b=1,v=1","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=q5_K,n=256,m=5,r=4,b=7,v=0","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=q5_K,n=256,m=5,r=4,b=7,v=1","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=q6_K,n=256,m=5,r=4,b=1,v=0","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=q6_K,n=256,m=5,r=4,b=1,v=1","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=q6_K,n=256,m=5,r=4,b=7,v=0","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=q6_K,n=256,m=5,r=4,b=7,v=1","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=iq2_xxs,n=256,m=5,r=4,b=1,v=0","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=iq2_xxs,n=256,m=5,r=4,b=1,v=1","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=iq2_xxs,n=256,m=5,r=4,b=7,v=0","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=iq2_xxs,n=256,m=5,r=4,b=7,v=1","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=iq2_xs,n=256,m=5,r=4,b=1,v=0","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=iq2_xs,n=256,m=5,r=4,b=1,v=1","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=iq2_xs,n=256,m=5,r=4,b=7,v=0","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=iq2_xs,n=256,m=5,r=4,b=7,v=1","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=iq2_s,n=256,m=5,r=4,b=1,v=0","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=iq2_s,n=256,m=5,r=4,b=1,v=1","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=iq2_s,n=256,m=5,r=4,b=7,v=0","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=iq2_s,n=256,m=5,r=4,b=7,v=1","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=iq3_xxs,n=256,m=5,r=4,b=1,v=0","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=iq3_xxs,n=256,m=5,r=4,b=1,v=1","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=iq3_xxs,n=256,m=5,r=4,b=7,v=0","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=iq3_xxs,n=256,m=5,r=4,b=7,v=1","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=iq1_s,n=256,m=5,r=4,b=1,v=0","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=iq1_s,n=256,m=5,r=4,b=1,v=1","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=iq1_s,n=256,m=5,r=4,b=7,v=0","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=iq1_s,n=256,m=5,r=4,b=7,v=1","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=iq1_m,n=256,m=5,r=4,b=1,v=0","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=iq1_m,n=256,m=5,r=4,b=1,v=1","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=iq1_m,n=256,m=5,r=4,b=7,v=0","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=iq1_m,n=256,m=5,r=4,b=7,v=1","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=iq4_nl,n=256,m=5,r=4,b=1,v=0","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=iq4_nl,n=256,m=5,r=4,b=1,v=1","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=iq4_nl,n=256,m=5,r=4,b=7,v=0","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=iq4_nl,n=256,m=5,r=4,b=7,v=1","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=iq3_s,n=256,m=5,r=4,b=1,v=0","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=iq3_s,n=256,m=5,r=4,b=1,v=1","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=iq3_s,n=256,m=5,r=4,b=7,v=0","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=iq3_s,n=256,m=5,r=4,b=7,v=1","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=iq4_xs,n=256,m=5,r=4,b=1,v=0","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=iq4_xs,n=256,m=5,r=4,b=1,v=1","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=iq4_xs,n=256,m=5,r=4,b=7,v=0","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=iq4_xs,n=256,m=5,r=4,b=7,v=1","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=i32,n=256,m=5,r=4,b=1,v=0","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=i32,n=256,m=5,r=4,b=1,v=1","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=i32,n=256,m=5,r=4,b=7,v=0","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=i32,n=256,m=5,r=4,b=7,v=1","support","1","yes","CUDA" +"CUDA0","SWIGLU_OAI","type=f32,ne_a=[128,2,2,2],v=0,alpha=0.500000,limit=2.000000","support","1","yes","CUDA" +"CUDA0","SWIGLU_OAI","type=f32,ne_a=[128,2,2,2],v=0,alpha=0.500000,limit=7.000000","support","1","yes","CUDA" +"CUDA0","SWIGLU_OAI","type=f32,ne_a=[128,2,2,2],v=0,alpha=1.702000,limit=2.000000","support","1","yes","CUDA" +"CUDA0","SWIGLU_OAI","type=f32,ne_a=[128,2,2,2],v=0,alpha=1.702000,limit=7.000000","support","1","yes","CUDA" +"CUDA0","SWIGLU_OAI","type=f32,ne_a=[128,2,2,2],v=1,alpha=0.500000,limit=2.000000","support","1","yes","CUDA" +"CUDA0","SWIGLU_OAI","type=f32,ne_a=[128,2,2,2],v=1,alpha=0.500000,limit=7.000000","support","1","yes","CUDA" +"CUDA0","SWIGLU_OAI","type=f32,ne_a=[128,2,2,2],v=1,alpha=1.702000,limit=2.000000","support","1","yes","CUDA" +"CUDA0","SWIGLU_OAI","type=f32,ne_a=[128,2,2,2],v=1,alpha=1.702000,limit=7.000000","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=f32,n=76800,m=5,r=4,be1=1,be2=2,v=0","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=f32,n=256,m=80000,r=70000,be1=2,be2=1,v=0","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=f32,n=256,m=5,r=4,be1=700,be2=100,v=0","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=q4_0,n=76800,m=5,r=4,be1=1,be2=2,v=0","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=q4_0,n=256,m=80000,r=70000,be1=2,be2=1,v=0","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=q4_0,n=256,m=5,r=4,be1=700,be2=100,v=0","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=f32,n=1,m=8,r=2,be1=1,be2=1,v=0","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=f32,n=256,m=5,r=4,be1=1,be2=1,v=0","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=f32,n=256,m=5,r=4,be1=1,be2=1,v=1","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=f32,n=256,m=5,r=4,be1=7,be2=1,v=0","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=f32,n=256,m=5,r=4,be1=7,be2=1,v=1","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=f16,n=256,m=5,r=4,be1=1,be2=1,v=0","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=f16,n=256,m=5,r=4,be1=1,be2=1,v=1","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=f16,n=256,m=5,r=4,be1=7,be2=1,v=0","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=f16,n=256,m=5,r=4,be1=7,be2=1,v=1","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=bf16,n=256,m=5,r=4,be1=1,be2=1,v=0","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=bf16,n=256,m=5,r=4,be1=1,be2=1,v=1","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=bf16,n=256,m=5,r=4,be1=7,be2=1,v=0","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=bf16,n=256,m=5,r=4,be1=7,be2=1,v=1","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=q4_0,n=256,m=5,r=4,be1=1,be2=1,v=0","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=q4_0,n=256,m=5,r=4,be1=1,be2=1,v=1","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=q4_0,n=256,m=5,r=4,be1=7,be2=1,v=0","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=q4_0,n=256,m=5,r=4,be1=7,be2=1,v=1","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=q4_1,n=256,m=5,r=4,be1=1,be2=1,v=0","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=q4_1,n=256,m=5,r=4,be1=1,be2=1,v=1","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=q4_1,n=256,m=5,r=4,be1=7,be2=1,v=0","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=q4_1,n=256,m=5,r=4,be1=7,be2=1,v=1","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=q5_0,n=256,m=5,r=4,be1=1,be2=1,v=0","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=q5_0,n=256,m=5,r=4,be1=1,be2=1,v=1","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=q5_0,n=256,m=5,r=4,be1=7,be2=1,v=0","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=q5_0,n=256,m=5,r=4,be1=7,be2=1,v=1","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=q5_1,n=256,m=5,r=4,be1=1,be2=1,v=0","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=q5_1,n=256,m=5,r=4,be1=1,be2=1,v=1","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=q5_1,n=256,m=5,r=4,be1=7,be2=1,v=0","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=q5_1,n=256,m=5,r=4,be1=7,be2=1,v=1","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=q8_0,n=256,m=5,r=4,be1=1,be2=1,v=0","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=q8_0,n=256,m=5,r=4,be1=1,be2=1,v=1","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=q8_0,n=256,m=5,r=4,be1=7,be2=1,v=0","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=q8_0,n=256,m=5,r=4,be1=7,be2=1,v=1","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=mxfp4,n=256,m=5,r=4,be1=1,be2=1,v=0","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=mxfp4,n=256,m=5,r=4,be1=1,be2=1,v=1","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=mxfp4,n=256,m=5,r=4,be1=7,be2=1,v=0","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=mxfp4,n=256,m=5,r=4,be1=7,be2=1,v=1","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=q2_K,n=256,m=5,r=4,be1=1,be2=1,v=0","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=q2_K,n=256,m=5,r=4,be1=1,be2=1,v=1","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=q2_K,n=256,m=5,r=4,be1=7,be2=1,v=0","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=q2_K,n=256,m=5,r=4,be1=7,be2=1,v=1","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=q3_K,n=256,m=5,r=4,be1=1,be2=1,v=0","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=q3_K,n=256,m=5,r=4,be1=1,be2=1,v=1","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=q3_K,n=256,m=5,r=4,be1=7,be2=1,v=0","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=q3_K,n=256,m=5,r=4,be1=7,be2=1,v=1","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=q4_K,n=256,m=5,r=4,be1=1,be2=1,v=0","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=q4_K,n=256,m=5,r=4,be1=1,be2=1,v=1","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=q4_K,n=256,m=5,r=4,be1=7,be2=1,v=0","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=q4_K,n=256,m=5,r=4,be1=7,be2=1,v=1","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=q5_K,n=256,m=5,r=4,be1=1,be2=1,v=0","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=q5_K,n=256,m=5,r=4,be1=1,be2=1,v=1","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=q5_K,n=256,m=5,r=4,be1=7,be2=1,v=0","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=q5_K,n=256,m=5,r=4,be1=7,be2=1,v=1","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=q6_K,n=256,m=5,r=4,be1=1,be2=1,v=0","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=q6_K,n=256,m=5,r=4,be1=1,be2=1,v=1","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=q6_K,n=256,m=5,r=4,be1=7,be2=1,v=0","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=q6_K,n=256,m=5,r=4,be1=7,be2=1,v=1","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=iq2_xxs,n=256,m=5,r=4,be1=1,be2=1,v=0","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=iq2_xxs,n=256,m=5,r=4,be1=1,be2=1,v=1","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=iq2_xxs,n=256,m=5,r=4,be1=7,be2=1,v=0","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=iq2_xxs,n=256,m=5,r=4,be1=7,be2=1,v=1","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=iq2_xs,n=256,m=5,r=4,be1=1,be2=1,v=0","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=iq2_xs,n=256,m=5,r=4,be1=1,be2=1,v=1","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=iq2_xs,n=256,m=5,r=4,be1=7,be2=1,v=0","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=iq2_xs,n=256,m=5,r=4,be1=7,be2=1,v=1","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=iq2_s,n=256,m=5,r=4,be1=1,be2=1,v=0","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=iq2_s,n=256,m=5,r=4,be1=1,be2=1,v=1","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=iq2_s,n=256,m=5,r=4,be1=7,be2=1,v=0","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=iq2_s,n=256,m=5,r=4,be1=7,be2=1,v=1","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=iq3_xxs,n=256,m=5,r=4,be1=1,be2=1,v=0","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=iq3_xxs,n=256,m=5,r=4,be1=1,be2=1,v=1","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=iq3_xxs,n=256,m=5,r=4,be1=7,be2=1,v=0","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=iq3_xxs,n=256,m=5,r=4,be1=7,be2=1,v=1","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=iq1_s,n=256,m=5,r=4,be1=1,be2=1,v=0","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=iq1_s,n=256,m=5,r=4,be1=1,be2=1,v=1","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=iq1_s,n=256,m=5,r=4,be1=7,be2=1,v=0","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=iq1_s,n=256,m=5,r=4,be1=7,be2=1,v=1","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=iq1_m,n=256,m=5,r=4,be1=1,be2=1,v=0","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=iq1_m,n=256,m=5,r=4,be1=1,be2=1,v=1","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=iq1_m,n=256,m=5,r=4,be1=7,be2=1,v=0","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=iq1_m,n=256,m=5,r=4,be1=7,be2=1,v=1","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=iq4_nl,n=256,m=5,r=4,be1=1,be2=1,v=0","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=iq4_nl,n=256,m=5,r=4,be1=1,be2=1,v=1","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=iq4_nl,n=256,m=5,r=4,be1=7,be2=1,v=0","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=iq4_nl,n=256,m=5,r=4,be1=7,be2=1,v=1","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=iq3_s,n=256,m=5,r=4,be1=1,be2=1,v=0","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=iq3_s,n=256,m=5,r=4,be1=1,be2=1,v=1","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=iq3_s,n=256,m=5,r=4,be1=7,be2=1,v=0","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=iq3_s,n=256,m=5,r=4,be1=7,be2=1,v=1","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=iq4_xs,n=256,m=5,r=4,be1=1,be2=1,v=0","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=iq4_xs,n=256,m=5,r=4,be1=1,be2=1,v=1","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=iq4_xs,n=256,m=5,r=4,be1=7,be2=1,v=0","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=iq4_xs,n=256,m=5,r=4,be1=7,be2=1,v=1","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=i32,n=256,m=5,r=4,be1=1,be2=1,v=0","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=i32,n=256,m=5,r=4,be1=1,be2=1,v=1","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=i32,n=256,m=5,r=4,be1=7,be2=1,v=0","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=i32,n=256,m=5,r=4,be1=7,be2=1,v=1","support","1","yes","CUDA" "CUDA0","GET_ROWS_BACK","type=f32,n=1,m=8,r=2,b=1,v=0","support","1","yes","CUDA" "CUDA0","GET_ROWS_BACK","type=f32,n=256,m=5,r=4,b=1,v=0","support","1","yes","CUDA" "CUDA0","GET_ROWS_BACK","type=f32,n=256,m=5,r=4,b=1,v=1","support","1","yes","CUDA" @@ -349,6 +407,8 @@ "CUDA0","GET_ROWS_BACK","type=q5_1,n=256,m=5,r=4,b=1,v=1","support","0","no","CUDA" "CUDA0","GET_ROWS_BACK","type=q8_0,n=256,m=5,r=4,b=1,v=0","support","0","no","CUDA" "CUDA0","GET_ROWS_BACK","type=q8_0,n=256,m=5,r=4,b=1,v=1","support","0","no","CUDA" +"CUDA0","GET_ROWS_BACK","type=mxfp4,n=256,m=5,r=4,b=1,v=0","support","0","no","CUDA" +"CUDA0","GET_ROWS_BACK","type=mxfp4,n=256,m=5,r=4,b=1,v=1","support","0","no","CUDA" "CUDA0","GET_ROWS_BACK","type=q2_K,n=256,m=5,r=4,b=1,v=0","support","0","no","CUDA" "CUDA0","GET_ROWS_BACK","type=q2_K,n=256,m=5,r=4,b=1,v=1","support","0","no","CUDA" "CUDA0","GET_ROWS_BACK","type=q3_K,n=256,m=5,r=4,b=1,v=0","support","0","no","CUDA" @@ -379,295 +439,309 @@ "CUDA0","GET_ROWS_BACK","type=iq4_xs,n=256,m=5,r=4,b=1,v=1","support","0","no","CUDA" "CUDA0","GET_ROWS_BACK","type=i32,n=256,m=5,r=4,b=1,v=0","support","0","no","CUDA" "CUDA0","GET_ROWS_BACK","type=i32,n=256,m=5,r=4,b=1,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=f32,ne=[1,8,1,3],nr23=[1,1],r=2,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=f32,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=f32,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=f32,ne=[3,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=f32,ne=[31,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=f32,ne=[33,5,1,1],nr23=[2,3],r=1,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=f32,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=f32,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=f32,ne=[3,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=f32,ne=[31,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=f32,ne=[33,5,1,1],nr23=[2,3],r=1,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=f32,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=f32,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=f32,ne=[3,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=f32,ne=[31,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=f32,ne=[33,5,1,7],nr23=[2,3],r=1,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=f32,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=f32,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=f32,ne=[3,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=f32,ne=[31,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=f32,ne=[33,5,1,7],nr23=[2,3],r=1,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=f16,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=f16,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=f16,ne=[3,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=f16,ne=[31,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=f16,ne=[33,5,1,1],nr23=[2,3],r=1,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=f16,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=f16,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=f16,ne=[3,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=f16,ne=[31,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=f16,ne=[33,5,1,1],nr23=[2,3],r=1,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=f16,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=f16,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=f16,ne=[3,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=f16,ne=[31,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=f16,ne=[33,5,1,7],nr23=[2,3],r=1,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=f16,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=f16,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=f16,ne=[3,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=f16,ne=[31,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=f16,ne=[33,5,1,7],nr23=[2,3],r=1,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=bf16,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=bf16,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=bf16,ne=[3,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=bf16,ne=[31,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=bf16,ne=[33,5,1,1],nr23=[2,3],r=1,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=bf16,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=bf16,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=bf16,ne=[3,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=bf16,ne=[31,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=bf16,ne=[33,5,1,1],nr23=[2,3],r=1,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=bf16,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=bf16,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=bf16,ne=[3,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=bf16,ne=[31,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=bf16,ne=[33,5,1,7],nr23=[2,3],r=1,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=bf16,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=bf16,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=bf16,ne=[3,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=bf16,ne=[31,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=bf16,ne=[33,5,1,7],nr23=[2,3],r=1,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q4_0,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q4_0,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q4_0,ne=[96,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q4_0,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q4_0,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q4_0,ne=[96,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q4_0,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q4_0,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q4_0,ne=[96,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q4_0,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q4_0,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q4_0,ne=[96,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q4_1,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q4_1,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q4_1,ne=[96,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q4_1,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q4_1,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q4_1,ne=[96,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q4_1,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q4_1,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q4_1,ne=[96,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q4_1,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q4_1,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q4_1,ne=[96,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q5_0,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q5_0,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q5_0,ne=[96,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q5_0,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q5_0,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q5_0,ne=[96,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q5_0,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q5_0,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q5_0,ne=[96,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q5_0,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q5_0,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q5_0,ne=[96,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q5_1,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q5_1,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q5_1,ne=[96,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q5_1,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q5_1,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q5_1,ne=[96,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q5_1,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q5_1,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q5_1,ne=[96,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q5_1,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q5_1,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q5_1,ne=[96,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q8_0,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q8_0,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q8_0,ne=[96,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q8_0,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q8_0,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q8_0,ne=[96,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q8_0,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q8_0,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q8_0,ne=[96,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q8_0,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q8_0,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q8_0,ne=[96,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q2_K,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q2_K,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q2_K,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q2_K,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q2_K,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q2_K,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q2_K,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q2_K,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q2_K,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q2_K,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q2_K,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q2_K,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q3_K,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q3_K,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q3_K,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q3_K,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q3_K,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q3_K,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q3_K,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q3_K,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q3_K,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q3_K,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q3_K,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q3_K,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q4_K,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q4_K,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q4_K,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q4_K,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q4_K,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q4_K,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q4_K,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q4_K,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q4_K,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q4_K,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q4_K,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q4_K,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q5_K,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q5_K,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q5_K,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q5_K,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q5_K,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q5_K,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q5_K,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q5_K,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q5_K,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q5_K,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q5_K,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q5_K,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q6_K,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q6_K,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q6_K,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q6_K,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q6_K,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q6_K,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q6_K,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q6_K,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q6_K,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q6_K,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q6_K,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q6_K,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq2_xxs,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq2_xxs,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq2_xxs,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq2_xxs,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq2_xxs,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq2_xxs,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq2_xxs,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq2_xxs,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq2_xxs,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq2_xxs,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq2_xxs,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq2_xxs,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq2_xs,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq2_xs,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq2_xs,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq2_xs,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq2_xs,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq2_xs,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq2_xs,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq2_xs,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq2_xs,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq2_xs,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq2_xs,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq2_xs,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq2_s,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq2_s,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq2_s,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq2_s,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq2_s,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq2_s,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq2_s,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq2_s,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq2_s,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq2_s,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq2_s,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq2_s,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq3_xxs,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq3_xxs,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq3_xxs,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq3_xxs,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq3_xxs,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq3_xxs,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq3_xxs,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq3_xxs,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq3_xxs,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq3_xxs,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq3_xxs,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq3_xxs,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq1_s,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq1_s,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq1_s,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq1_s,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq1_s,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq1_s,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq1_s,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq1_s,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq1_s,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq1_s,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq1_s,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq1_s,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq1_m,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq1_m,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq1_m,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq1_m,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq1_m,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq1_m,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq1_m,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq1_m,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq1_m,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq1_m,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq1_m,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq1_m,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq4_nl,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=iq4_nl,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=iq4_nl,ne=[96,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=iq4_nl,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=iq4_nl,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=iq4_nl,ne=[96,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=iq4_nl,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=iq4_nl,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=iq4_nl,ne=[96,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=iq4_nl,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=iq4_nl,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=iq4_nl,ne=[96,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=iq3_s,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq3_s,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq3_s,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq3_s,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq3_s,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq3_s,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq3_s,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq3_s,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq3_s,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq3_s,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq3_s,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq3_s,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq4_xs,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq4_xs,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq4_xs,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq4_xs,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq4_xs,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq4_xs,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq4_xs,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq4_xs,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq4_xs,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq4_xs,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq4_xs,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq4_xs,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=f32,type_idx=i64,ne=[1,8,1,3],nr23=[1,1],r=2,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=f32,type_idx=i32,ne=[1,8,1,3],nr23=[1,1],r=2,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q8_0,type_idx=i32,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=f32,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=f32,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=f32,type_idx=i64,ne=[3,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=f32,type_idx=i64,ne=[31,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=f32,type_idx=i64,ne=[33,5,1,1],nr23=[2,3],r=1,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=f32,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=f32,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=f32,type_idx=i64,ne=[3,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=f32,type_idx=i64,ne=[31,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=f32,type_idx=i64,ne=[33,5,1,1],nr23=[2,3],r=1,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=f32,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=f32,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=f32,type_idx=i64,ne=[3,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=f32,type_idx=i64,ne=[31,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=f32,type_idx=i64,ne=[33,5,1,7],nr23=[2,3],r=1,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=f32,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=f32,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=f32,type_idx=i64,ne=[3,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=f32,type_idx=i64,ne=[31,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=f32,type_idx=i64,ne=[33,5,1,7],nr23=[2,3],r=1,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=f16,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=f16,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=f16,type_idx=i64,ne=[3,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=f16,type_idx=i64,ne=[31,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=f16,type_idx=i64,ne=[33,5,1,1],nr23=[2,3],r=1,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=f16,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=f16,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=f16,type_idx=i64,ne=[3,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=f16,type_idx=i64,ne=[31,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=f16,type_idx=i64,ne=[33,5,1,1],nr23=[2,3],r=1,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=f16,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=f16,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=f16,type_idx=i64,ne=[3,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=f16,type_idx=i64,ne=[31,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=f16,type_idx=i64,ne=[33,5,1,7],nr23=[2,3],r=1,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=f16,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=f16,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=f16,type_idx=i64,ne=[3,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=f16,type_idx=i64,ne=[31,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=f16,type_idx=i64,ne=[33,5,1,7],nr23=[2,3],r=1,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=bf16,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=bf16,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=bf16,type_idx=i64,ne=[3,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=bf16,type_idx=i64,ne=[31,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=bf16,type_idx=i64,ne=[33,5,1,1],nr23=[2,3],r=1,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=bf16,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=bf16,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=bf16,type_idx=i64,ne=[3,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=bf16,type_idx=i64,ne=[31,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=bf16,type_idx=i64,ne=[33,5,1,1],nr23=[2,3],r=1,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=bf16,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=bf16,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=bf16,type_idx=i64,ne=[3,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=bf16,type_idx=i64,ne=[31,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=bf16,type_idx=i64,ne=[33,5,1,7],nr23=[2,3],r=1,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=bf16,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=bf16,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=bf16,type_idx=i64,ne=[3,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=bf16,type_idx=i64,ne=[31,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=bf16,type_idx=i64,ne=[33,5,1,7],nr23=[2,3],r=1,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q4_0,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q4_0,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q4_0,type_idx=i64,ne=[96,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q4_0,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q4_0,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q4_0,type_idx=i64,ne=[96,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q4_0,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q4_0,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q4_0,type_idx=i64,ne=[96,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q4_0,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q4_0,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q4_0,type_idx=i64,ne=[96,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q4_1,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q4_1,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q4_1,type_idx=i64,ne=[96,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q4_1,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q4_1,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q4_1,type_idx=i64,ne=[96,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q4_1,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q4_1,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q4_1,type_idx=i64,ne=[96,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q4_1,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q4_1,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q4_1,type_idx=i64,ne=[96,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q5_0,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q5_0,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q5_0,type_idx=i64,ne=[96,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q5_0,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q5_0,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q5_0,type_idx=i64,ne=[96,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q5_0,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q5_0,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q5_0,type_idx=i64,ne=[96,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q5_0,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q5_0,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q5_0,type_idx=i64,ne=[96,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q5_1,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q5_1,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q5_1,type_idx=i64,ne=[96,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q5_1,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q5_1,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q5_1,type_idx=i64,ne=[96,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q5_1,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q5_1,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q5_1,type_idx=i64,ne=[96,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q5_1,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q5_1,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q5_1,type_idx=i64,ne=[96,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q8_0,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q8_0,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q8_0,type_idx=i64,ne=[96,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q8_0,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q8_0,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q8_0,type_idx=i64,ne=[96,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q8_0,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q8_0,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q8_0,type_idx=i64,ne=[96,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q8_0,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q8_0,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q8_0,type_idx=i64,ne=[96,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=mxfp4,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=mxfp4,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=mxfp4,type_idx=i64,ne=[96,3,1,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=mxfp4,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=mxfp4,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=mxfp4,type_idx=i64,ne=[96,3,1,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=mxfp4,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=mxfp4,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=mxfp4,type_idx=i64,ne=[96,3,7,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=mxfp4,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=mxfp4,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=mxfp4,type_idx=i64,ne=[96,3,7,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q2_K,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q2_K,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q2_K,type_idx=i64,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q2_K,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q2_K,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q2_K,type_idx=i64,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q2_K,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q2_K,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q2_K,type_idx=i64,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q2_K,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q2_K,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q2_K,type_idx=i64,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q3_K,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q3_K,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q3_K,type_idx=i64,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q3_K,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q3_K,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q3_K,type_idx=i64,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q3_K,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q3_K,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q3_K,type_idx=i64,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q3_K,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q3_K,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q3_K,type_idx=i64,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q4_K,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q4_K,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q4_K,type_idx=i64,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q4_K,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q4_K,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q4_K,type_idx=i64,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q4_K,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q4_K,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q4_K,type_idx=i64,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q4_K,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q4_K,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q4_K,type_idx=i64,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q5_K,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q5_K,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q5_K,type_idx=i64,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q5_K,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q5_K,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q5_K,type_idx=i64,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q5_K,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q5_K,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q5_K,type_idx=i64,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q5_K,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q5_K,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q5_K,type_idx=i64,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q6_K,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q6_K,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q6_K,type_idx=i64,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q6_K,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q6_K,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q6_K,type_idx=i64,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q6_K,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q6_K,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q6_K,type_idx=i64,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q6_K,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q6_K,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q6_K,type_idx=i64,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq2_xxs,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq2_xxs,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq2_xxs,type_idx=i64,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq2_xxs,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq2_xxs,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq2_xxs,type_idx=i64,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq2_xxs,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq2_xxs,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq2_xxs,type_idx=i64,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq2_xxs,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq2_xxs,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq2_xxs,type_idx=i64,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq2_xs,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq2_xs,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq2_xs,type_idx=i64,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq2_xs,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq2_xs,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq2_xs,type_idx=i64,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq2_xs,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq2_xs,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq2_xs,type_idx=i64,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq2_xs,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq2_xs,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq2_xs,type_idx=i64,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq2_s,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq2_s,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq2_s,type_idx=i64,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq2_s,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq2_s,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq2_s,type_idx=i64,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq2_s,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq2_s,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq2_s,type_idx=i64,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq2_s,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq2_s,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq2_s,type_idx=i64,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq3_xxs,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq3_xxs,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq3_xxs,type_idx=i64,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq3_xxs,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq3_xxs,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq3_xxs,type_idx=i64,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq3_xxs,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq3_xxs,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq3_xxs,type_idx=i64,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq3_xxs,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq3_xxs,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq3_xxs,type_idx=i64,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq1_s,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq1_s,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq1_s,type_idx=i64,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq1_s,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq1_s,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq1_s,type_idx=i64,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq1_s,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq1_s,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq1_s,type_idx=i64,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq1_s,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq1_s,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq1_s,type_idx=i64,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq1_m,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq1_m,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq1_m,type_idx=i64,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq1_m,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq1_m,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq1_m,type_idx=i64,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq1_m,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq1_m,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq1_m,type_idx=i64,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq1_m,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq1_m,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq1_m,type_idx=i64,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq4_nl,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=iq4_nl,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=iq4_nl,type_idx=i64,ne=[96,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=iq4_nl,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=iq4_nl,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=iq4_nl,type_idx=i64,ne=[96,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=iq4_nl,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=iq4_nl,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=iq4_nl,type_idx=i64,ne=[96,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=iq4_nl,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=iq4_nl,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=iq4_nl,type_idx=i64,ne=[96,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=iq3_s,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq3_s,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq3_s,type_idx=i64,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq3_s,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq3_s,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq3_s,type_idx=i64,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq3_s,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq3_s,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq3_s,type_idx=i64,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq3_s,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq3_s,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq3_s,type_idx=i64,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq4_xs,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq4_xs,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq4_xs,type_idx=i64,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq4_xs,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq4_xs,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq4_xs,type_idx=i64,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq4_xs,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq4_xs,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq4_xs,type_idx=i64,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq4_xs,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq4_xs,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq4_xs,type_idx=i64,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" "CUDA0","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=1,s1=1,p0=0,p1=0","support","1","yes","CUDA" "CUDA0","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=1,s1=1,p0=0,p1=1","support","1","yes","CUDA" "CUDA0","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=1,s1=1,p0=1,p1=0","support","1","yes","CUDA" @@ -883,794 +957,3887 @@ "CUDA0","IM2COL","type_input=f32,type_kernel=f16,dst_type=f16,ne_input=[12,12,1,2560],ne_kernel=[3,3,1,2560],s0=1,s1=1,p0=1,p1=1,d0=1,d1=1,is_2D=1","support","1","yes","CUDA" "CUDA0","IM2COL","type_input=f32,type_kernel=f16,dst_type=f16,ne_input=[12,12,2,2560],ne_kernel=[3,3,2,2560],s0=1,s1=1,p0=1,p1=1,d0=1,d1=1,is_2D=1","support","1","yes","CUDA" "CUDA0","IM2COL","type_input=f32,type_kernel=f16,dst_type=f16,ne_input=[5,5,1,32],ne_kernel=[3,4,1,32],s0=1,s1=1,p0=0,p1=0,d0=1,d1=1,is_2D=1","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[10,10,10,9],ne_kernel=[3,3,3,1],IC=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f16,dst_type=f32,ne_input=[10,10,10,9],ne_kernel=[3,3,3,1],IC=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f16,dst_type=f16,ne_input=[10,10,10,9],ne_kernel=[3,3,3,1],IC=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" +"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" "CUDA0","CONV_2D_DW","ne_input=[17,34,9,1],ne_kernel=[3,3,1,9],stride=1,padding=0,dilation=1,cwhn=0","support","1","yes","CUDA" "CUDA0","CONV_2D_DW","ne_input=[17,34,9,1],ne_kernel=[3,3,1,9],stride=1,padding=0,dilation=1,cwhn=1","support","1","yes","CUDA" "CUDA0","CONV_2D_DW","ne_input=[32,8,64,1],ne_kernel=[3,3,1,64],stride=2,padding=1,dilation=1,cwhn=0","support","1","yes","CUDA" "CUDA0","CONV_2D_DW","ne_input=[32,8,64,1],ne_kernel=[3,3,1,64],stride=2,padding=1,dilation=1,cwhn=1","support","1","yes","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=4,ID=8,IH=8,IW=8,OC=8,KD=1,KH=1,KW=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" +"CUDA0","CONV_3D","N=1,IC=4,ID=8,IH=8,IW=8,OC=8,KD=1,KH=1,KW=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA" "CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[1,1,1,1],s0=1,p0=0,d0=1","support","1","yes","CUDA" "CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[1,1,1,1],s0=2,p0=0,d0=1","support","1","yes","CUDA" "CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[1,1,1,1],s0=3,p0=0,d0=1","support","1","yes","CUDA" @@ -1792,6 +4959,7 @@ "CUDA0","COUNT_EQUAL","type=f32,ne=[4,500,1,1]","support","1","yes","CUDA" "CUDA0","COUNT_EQUAL","type=f32,ne=[4,5000,1,1]","support","1","yes","CUDA" "CUDA0","ARGMAX","type=f32,ne=[32,1,1,1]","support","1","yes","CUDA" +"CUDA0","ARGMAX","type=f32,ne=[32,513,1,1]","support","1","yes","CUDA" "CUDA0","ARGMAX","type=f32,ne=[100,10,1,1]","support","1","yes","CUDA" "CUDA0","ARGMAX","type=f32,ne=[1024,10,1,1]","support","1","yes","CUDA" "CUDA0","ARGMAX","type=f32,ne=[1024,12,1,1]","support","1","yes","CUDA" @@ -1831,12 +4999,12 @@ "CUDA0","DUP","type=f16,ne=[10,10,5,1],permute=[1,0,2,3]","support","1","yes","CUDA" "CUDA0","DUP","type=i16,ne=[10,8,3,1],permute=[0,2,1,3]","support","0","no","CUDA" "CUDA0","DUP","type=i16,ne=[10,8,3,1],permute=[1,2,0,3]","support","0","no","CUDA" -"CUDA0","SET","type_src=f32,type_dst=f32,ne=[6,5,4,3],dim=1","support","0","no","CUDA" -"CUDA0","SET","type_src=f32,type_dst=f32,ne=[6,5,4,3],dim=2","support","0","no","CUDA" -"CUDA0","SET","type_src=f32,type_dst=f32,ne=[6,5,4,3],dim=3","support","0","no","CUDA" -"CUDA0","SET","type_src=i32,type_dst=i32,ne=[6,5,4,3],dim=1","support","0","no","CUDA" -"CUDA0","SET","type_src=i32,type_dst=i32,ne=[6,5,4,3],dim=2","support","0","no","CUDA" -"CUDA0","SET","type_src=i32,type_dst=i32,ne=[6,5,4,3],dim=3","support","0","no","CUDA" +"CUDA0","SET","type_src=f32,type_dst=f32,ne=[6,5,4,3],dim=1","support","1","yes","CUDA" +"CUDA0","SET","type_src=f32,type_dst=f32,ne=[6,5,4,3],dim=2","support","1","yes","CUDA" +"CUDA0","SET","type_src=f32,type_dst=f32,ne=[6,5,4,3],dim=3","support","1","yes","CUDA" +"CUDA0","SET","type_src=i32,type_dst=i32,ne=[6,5,4,3],dim=1","support","1","yes","CUDA" +"CUDA0","SET","type_src=i32,type_dst=i32,ne=[6,5,4,3],dim=2","support","1","yes","CUDA" +"CUDA0","SET","type_src=i32,type_dst=i32,ne=[6,5,4,3],dim=3","support","1","yes","CUDA" "CUDA0","CPY","type_src=f32,type_dst=f32,ne=[1,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA" "CUDA0","CPY","type_src=f32,type_dst=f32,ne=[1,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CUDA" "CUDA0","CPY","type_src=f32,type_dst=f32,ne=[1,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","yes","CUDA" @@ -1909,6 +5077,15 @@ "CUDA0","CPY","type_src=q8_0,type_dst=q8_0,ne=[96,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA" "CUDA0","CPY","type_src=q8_0,type_dst=q8_0,ne=[96,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA" "CUDA0","CPY","type_src=q8_0,type_dst=q8_0,ne=[96,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","CPY","type_src=mxfp4,type_dst=mxfp4,ne=[32,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA" +"CUDA0","CPY","type_src=mxfp4,type_dst=mxfp4,ne=[32,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA" +"CUDA0","CPY","type_src=mxfp4,type_dst=mxfp4,ne=[32,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","CPY","type_src=mxfp4,type_dst=mxfp4,ne=[64,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA" +"CUDA0","CPY","type_src=mxfp4,type_dst=mxfp4,ne=[64,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA" +"CUDA0","CPY","type_src=mxfp4,type_dst=mxfp4,ne=[64,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","CPY","type_src=mxfp4,type_dst=mxfp4,ne=[96,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA" +"CUDA0","CPY","type_src=mxfp4,type_dst=mxfp4,ne=[96,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA" +"CUDA0","CPY","type_src=mxfp4,type_dst=mxfp4,ne=[96,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","no","CUDA" "CUDA0","CPY","type_src=q2_K,type_dst=q2_K,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA" "CUDA0","CPY","type_src=q2_K,type_dst=q2_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA" "CUDA0","CPY","type_src=q2_K,type_dst=q2_K,ne=[256,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","no","CUDA" @@ -2051,6 +5228,8 @@ "CUDA0","CPY","type_src=f16,type_dst=q5_1,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA" "CUDA0","CPY","type_src=f16,type_dst=q8_0,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CUDA" "CUDA0","CPY","type_src=f16,type_dst=q8_0,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA" +"CUDA0","CPY","type_src=f16,type_dst=mxfp4,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CUDA" +"CUDA0","CPY","type_src=f16,type_dst=mxfp4,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA" "CUDA0","CPY","type_src=f16,type_dst=q2_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CUDA" "CUDA0","CPY","type_src=f16,type_dst=q2_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA" "CUDA0","CPY","type_src=f16,type_dst=q3_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CUDA" @@ -2095,6 +5274,8 @@ "CUDA0","CPY","type_src=bf16,type_dst=q5_1,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA" "CUDA0","CPY","type_src=bf16,type_dst=q8_0,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CUDA" "CUDA0","CPY","type_src=bf16,type_dst=q8_0,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA" +"CUDA0","CPY","type_src=bf16,type_dst=mxfp4,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CUDA" +"CUDA0","CPY","type_src=bf16,type_dst=mxfp4,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA" "CUDA0","CPY","type_src=bf16,type_dst=q2_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CUDA" "CUDA0","CPY","type_src=bf16,type_dst=q2_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA" "CUDA0","CPY","type_src=bf16,type_dst=q3_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CUDA" @@ -2139,6 +5320,8 @@ "CUDA0","CPY","type_src=f32,type_dst=q5_1,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CUDA" "CUDA0","CPY","type_src=f32,type_dst=q8_0,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA" "CUDA0","CPY","type_src=f32,type_dst=q8_0,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CUDA" +"CUDA0","CPY","type_src=f32,type_dst=mxfp4,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CUDA" +"CUDA0","CPY","type_src=f32,type_dst=mxfp4,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA" "CUDA0","CPY","type_src=f32,type_dst=q2_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CUDA" "CUDA0","CPY","type_src=f32,type_dst=q2_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA" "CUDA0","CPY","type_src=f32,type_dst=q3_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CUDA" @@ -2183,6 +5366,8 @@ "CUDA0","CPY","type_src=q5_1,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CUDA" "CUDA0","CPY","type_src=q8_0,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA" "CUDA0","CPY","type_src=q8_0,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CUDA" +"CUDA0","CPY","type_src=mxfp4,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CUDA" +"CUDA0","CPY","type_src=mxfp4,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA" "CUDA0","CPY","type_src=q2_K,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CUDA" "CUDA0","CPY","type_src=q2_K,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA" "CUDA0","CPY","type_src=q3_K,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CUDA" @@ -2215,6 +5400,10 @@ "CUDA0","CPY","type_src=f16,type_dst=f32,ne=[256,2,3,4],permute_src=[1,0,2,3],permute_dst=[0,0,0,0]","support","1","yes","CUDA" "CUDA0","CPY","type_src=f32,type_dst=f16,ne=[256,2,3,4],permute_src=[1,0,2,3],permute_dst=[0,0,0,0]","support","1","yes","CUDA" "CUDA0","CPY","type_src=f32,type_dst=f32,ne=[256,2,3,4],permute_src=[1,0,2,3],permute_dst=[0,0,0,0]","support","1","yes","CUDA" +"CUDA0","CPY","type_src=f32,type_dst=i32,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA" +"CUDA0","CPY","type_src=f32,type_dst=i32,ne=[256,2,3,4],permute_src=[1,0,2,3],permute_dst=[0,0,0,0]","support","1","yes","CUDA" +"CUDA0","CPY","type_src=i32,type_dst=f32,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA" +"CUDA0","CPY","type_src=i32,type_dst=f32,ne=[256,2,3,4],permute_src=[1,0,2,3],permute_dst=[0,0,0,0]","support","1","yes","CUDA" "CUDA0","CONT","type=f32,ne=[10,10,10,1]","support","1","yes","CUDA" "CUDA0","CONT","type=f32,ne=[2,1,1,1]","support","1","yes","CUDA" "CUDA0","CONT","type=f32,ne=[2,1,3,5]","support","1","yes","CUDA" @@ -2225,259 +5414,37 @@ "CUDA0","CONT","type=bf16,ne=[2,1,1,1]","support","1","yes","CUDA" "CUDA0","CONT","type=bf16,ne=[2,1,3,5]","support","1","yes","CUDA" "CUDA0","CONT","type=bf16,ne=[2,3,5,7]","support","1","yes","CUDA" -"CUDA0","ADD","type=f16,ne=[1,1,8,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","SUB","type=f16,ne=[1,1,8,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","MUL","type=f16,ne=[1,1,8,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","DIV","type=f16,ne=[1,1,8,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","ADD","type=f16,ne=[1,1,1,1],nr=[32,1,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","SUB","type=f16,ne=[1,1,1,1],nr=[32,1,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","MUL","type=f16,ne=[1,1,1,1],nr=[32,1,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","DIV","type=f16,ne=[1,1,1,1],nr=[32,1,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","ADD","type=f16,ne=[1,1,320,320],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","SUB","type=f16,ne=[1,1,320,320],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","MUL","type=f16,ne=[1,1,320,320],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","DIV","type=f16,ne=[1,1,320,320],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","ADD","type=f16,ne=[10,5,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","SUB","type=f16,ne=[10,5,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","MUL","type=f16,ne=[10,5,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","DIV","type=f16,ne=[10,5,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","ADD","type=f16,ne=[10,5,4,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","SUB","type=f16,ne=[10,5,4,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","MUL","type=f16,ne=[10,5,4,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","DIV","type=f16,ne=[10,5,4,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","ADD","type=f16,ne=[10,5,4,3],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","SUB","type=f16,ne=[10,5,4,3],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","MUL","type=f16,ne=[10,5,4,3],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","DIV","type=f16,ne=[10,5,4,3],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","ADD","type=f16,ne=[10,5,4,3],nr=[2,1,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","SUB","type=f16,ne=[10,5,4,3],nr=[2,1,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","MUL","type=f16,ne=[10,5,4,3],nr=[2,1,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","DIV","type=f16,ne=[10,5,4,3],nr=[2,1,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","ADD","type=f16,ne=[10,5,4,3],nr=[1,2,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","SUB","type=f16,ne=[10,5,4,3],nr=[1,2,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","MUL","type=f16,ne=[10,5,4,3],nr=[1,2,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","DIV","type=f16,ne=[10,5,4,3],nr=[1,2,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","ADD","type=f16,ne=[10,5,4,3],nr=[1,1,2,1],nf=1","support","1","yes","CUDA" -"CUDA0","SUB","type=f16,ne=[10,5,4,3],nr=[1,1,2,1],nf=1","support","1","yes","CUDA" -"CUDA0","MUL","type=f16,ne=[10,5,4,3],nr=[1,1,2,1],nf=1","support","1","yes","CUDA" -"CUDA0","DIV","type=f16,ne=[10,5,4,3],nr=[1,1,2,1],nf=1","support","1","yes","CUDA" -"CUDA0","ADD","type=f16,ne=[10,5,4,3],nr=[1,1,1,2],nf=1","support","1","yes","CUDA" -"CUDA0","SUB","type=f16,ne=[10,5,4,3],nr=[1,1,1,2],nf=1","support","1","yes","CUDA" -"CUDA0","MUL","type=f16,ne=[10,5,4,3],nr=[1,1,1,2],nf=1","support","1","yes","CUDA" -"CUDA0","DIV","type=f16,ne=[10,5,4,3],nr=[1,1,1,2],nf=1","support","1","yes","CUDA" -"CUDA0","ADD","type=f16,ne=[10,5,4,3],nr=[1,1,2,2],nf=1","support","1","yes","CUDA" -"CUDA0","SUB","type=f16,ne=[10,5,4,3],nr=[1,1,2,2],nf=1","support","1","yes","CUDA" -"CUDA0","MUL","type=f16,ne=[10,5,4,3],nr=[1,1,2,2],nf=1","support","1","yes","CUDA" -"CUDA0","DIV","type=f16,ne=[10,5,4,3],nr=[1,1,2,2],nf=1","support","1","yes","CUDA" -"CUDA0","ADD","type=f16,ne=[10,5,4,3],nr=[1,2,2,2],nf=1","support","1","yes","CUDA" -"CUDA0","SUB","type=f16,ne=[10,5,4,3],nr=[1,2,2,2],nf=1","support","1","yes","CUDA" -"CUDA0","MUL","type=f16,ne=[10,5,4,3],nr=[1,2,2,2],nf=1","support","1","yes","CUDA" -"CUDA0","DIV","type=f16,ne=[10,5,4,3],nr=[1,2,2,2],nf=1","support","1","yes","CUDA" -"CUDA0","ADD","type=f16,ne=[10,5,4,3],nr=[2,2,2,2],nf=1","support","1","yes","CUDA" -"CUDA0","SUB","type=f16,ne=[10,5,4,3],nr=[2,2,2,2],nf=1","support","1","yes","CUDA" -"CUDA0","MUL","type=f16,ne=[10,5,4,3],nr=[2,2,2,2],nf=1","support","1","yes","CUDA" -"CUDA0","DIV","type=f16,ne=[10,5,4,3],nr=[2,2,2,2],nf=1","support","1","yes","CUDA" -"CUDA0","ADD","type=f16,ne=[1280,1,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","SUB","type=f16,ne=[1280,1,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","MUL","type=f16,ne=[1280,1,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","DIV","type=f16,ne=[1280,1,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","ADD","type=f16,ne=[1280,1,1,1],nr=[1,16,16,1],nf=1","support","1","yes","CUDA" -"CUDA0","SUB","type=f16,ne=[1280,1,1,1],nr=[1,16,16,1],nf=1","support","1","yes","CUDA" -"CUDA0","MUL","type=f16,ne=[1280,1,1,1],nr=[1,16,16,1],nf=1","support","1","yes","CUDA" -"CUDA0","DIV","type=f16,ne=[1280,1,1,1],nr=[1,16,16,1],nf=1","support","1","yes","CUDA" -"CUDA0","ADD","type=f16,ne=[1280,16,16,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","SUB","type=f16,ne=[1280,16,16,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","MUL","type=f16,ne=[1280,16,16,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","DIV","type=f16,ne=[1280,16,16,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","ADD","type=f16,ne=[1280,1,1,1],nr=[1,256,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","SUB","type=f16,ne=[1280,1,1,1],nr=[1,256,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","MUL","type=f16,ne=[1280,1,1,1],nr=[1,256,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","DIV","type=f16,ne=[1280,1,1,1],nr=[1,256,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","ADD","type=f16,ne=[1,1,1280,1],nr=[16,16,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","SUB","type=f16,ne=[1,1,1280,1],nr=[16,16,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","MUL","type=f16,ne=[1,1,1280,1],nr=[16,16,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","DIV","type=f16,ne=[1,1,1280,1],nr=[16,16,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","ADD","type=f16,ne=[16,16,1280,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","SUB","type=f16,ne=[16,16,1280,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","MUL","type=f16,ne=[16,16,1280,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","DIV","type=f16,ne=[16,16,1280,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","ADD","type=f16,ne=[1,1,1920,1],nr=[16,16,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","SUB","type=f16,ne=[1,1,1920,1],nr=[16,16,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","MUL","type=f16,ne=[1,1,1920,1],nr=[16,16,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","DIV","type=f16,ne=[1,1,1920,1],nr=[16,16,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","ADD","type=f16,ne=[1,1,2560,1],nr=[16,16,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","SUB","type=f16,ne=[1,1,2560,1],nr=[16,16,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","MUL","type=f16,ne=[1,1,2560,1],nr=[16,16,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","DIV","type=f16,ne=[1,1,2560,1],nr=[16,16,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","ADD","type=f16,ne=[1,1,1280,1],nr=[32,32,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","SUB","type=f16,ne=[1,1,1280,1],nr=[32,32,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","MUL","type=f16,ne=[1,1,1280,1],nr=[32,32,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","DIV","type=f16,ne=[1,1,1280,1],nr=[32,32,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","ADD","type=f16,ne=[1,1,1920,1],nr=[32,32,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","SUB","type=f16,ne=[1,1,1920,1],nr=[32,32,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","MUL","type=f16,ne=[1,1,1920,1],nr=[32,32,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","DIV","type=f16,ne=[1,1,1920,1],nr=[32,32,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","ADD","type=f16,ne=[1,1,640,1],nr=[32,32,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","SUB","type=f16,ne=[1,1,640,1],nr=[32,32,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","MUL","type=f16,ne=[1,1,640,1],nr=[32,32,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","DIV","type=f16,ne=[1,1,640,1],nr=[32,32,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","ADD","type=f16,ne=[5120,1,1,1],nr=[1,256,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","SUB","type=f16,ne=[5120,1,1,1],nr=[1,256,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","MUL","type=f16,ne=[5120,1,1,1],nr=[1,256,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","DIV","type=f16,ne=[5120,1,1,1],nr=[1,256,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","ADD","type=f16,ne=[640,1,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","SUB","type=f16,ne=[640,1,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","MUL","type=f16,ne=[640,1,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","DIV","type=f16,ne=[640,1,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","ADD","type=f32,ne=[1,1,8,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","SUB","type=f32,ne=[1,1,8,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","MUL","type=f32,ne=[1,1,8,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","DIV","type=f32,ne=[1,1,8,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","ADD","type=f32,ne=[1,1,1,1],nr=[32,1,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","SUB","type=f32,ne=[1,1,1,1],nr=[32,1,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","MUL","type=f32,ne=[1,1,1,1],nr=[32,1,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","DIV","type=f32,ne=[1,1,1,1],nr=[32,1,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","ADD","type=f32,ne=[1,1,320,320],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","SUB","type=f32,ne=[1,1,320,320],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","MUL","type=f32,ne=[1,1,320,320],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","DIV","type=f32,ne=[1,1,320,320],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","ADD","type=f32,ne=[10,5,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","SUB","type=f32,ne=[10,5,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","MUL","type=f32,ne=[10,5,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","DIV","type=f32,ne=[10,5,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","ADD","type=f32,ne=[10,5,4,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","SUB","type=f32,ne=[10,5,4,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","MUL","type=f32,ne=[10,5,4,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","DIV","type=f32,ne=[10,5,4,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","ADD","type=f32,ne=[10,5,4,3],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","SUB","type=f32,ne=[10,5,4,3],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","MUL","type=f32,ne=[10,5,4,3],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","DIV","type=f32,ne=[10,5,4,3],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","ADD","type=f32,ne=[10,5,4,3],nr=[2,1,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","SUB","type=f32,ne=[10,5,4,3],nr=[2,1,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","MUL","type=f32,ne=[10,5,4,3],nr=[2,1,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","DIV","type=f32,ne=[10,5,4,3],nr=[2,1,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","ADD","type=f32,ne=[10,5,4,3],nr=[1,2,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","SUB","type=f32,ne=[10,5,4,3],nr=[1,2,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","MUL","type=f32,ne=[10,5,4,3],nr=[1,2,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","DIV","type=f32,ne=[10,5,4,3],nr=[1,2,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","ADD","type=f32,ne=[10,5,4,3],nr=[1,1,2,1],nf=1","support","1","yes","CUDA" -"CUDA0","SUB","type=f32,ne=[10,5,4,3],nr=[1,1,2,1],nf=1","support","1","yes","CUDA" -"CUDA0","MUL","type=f32,ne=[10,5,4,3],nr=[1,1,2,1],nf=1","support","1","yes","CUDA" -"CUDA0","DIV","type=f32,ne=[10,5,4,3],nr=[1,1,2,1],nf=1","support","1","yes","CUDA" -"CUDA0","ADD","type=f32,ne=[10,5,4,3],nr=[1,1,1,2],nf=1","support","1","yes","CUDA" -"CUDA0","SUB","type=f32,ne=[10,5,4,3],nr=[1,1,1,2],nf=1","support","1","yes","CUDA" -"CUDA0","MUL","type=f32,ne=[10,5,4,3],nr=[1,1,1,2],nf=1","support","1","yes","CUDA" -"CUDA0","DIV","type=f32,ne=[10,5,4,3],nr=[1,1,1,2],nf=1","support","1","yes","CUDA" -"CUDA0","ADD","type=f32,ne=[10,5,4,3],nr=[1,1,2,2],nf=1","support","1","yes","CUDA" -"CUDA0","SUB","type=f32,ne=[10,5,4,3],nr=[1,1,2,2],nf=1","support","1","yes","CUDA" -"CUDA0","MUL","type=f32,ne=[10,5,4,3],nr=[1,1,2,2],nf=1","support","1","yes","CUDA" -"CUDA0","DIV","type=f32,ne=[10,5,4,3],nr=[1,1,2,2],nf=1","support","1","yes","CUDA" -"CUDA0","ADD","type=f32,ne=[10,5,4,3],nr=[1,2,2,2],nf=1","support","1","yes","CUDA" -"CUDA0","SUB","type=f32,ne=[10,5,4,3],nr=[1,2,2,2],nf=1","support","1","yes","CUDA" -"CUDA0","MUL","type=f32,ne=[10,5,4,3],nr=[1,2,2,2],nf=1","support","1","yes","CUDA" -"CUDA0","DIV","type=f32,ne=[10,5,4,3],nr=[1,2,2,2],nf=1","support","1","yes","CUDA" -"CUDA0","ADD","type=f32,ne=[10,5,4,3],nr=[2,2,2,2],nf=1","support","1","yes","CUDA" -"CUDA0","SUB","type=f32,ne=[10,5,4,3],nr=[2,2,2,2],nf=1","support","1","yes","CUDA" -"CUDA0","MUL","type=f32,ne=[10,5,4,3],nr=[2,2,2,2],nf=1","support","1","yes","CUDA" -"CUDA0","DIV","type=f32,ne=[10,5,4,3],nr=[2,2,2,2],nf=1","support","1","yes","CUDA" -"CUDA0","ADD","type=f32,ne=[1280,1,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","SUB","type=f32,ne=[1280,1,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","MUL","type=f32,ne=[1280,1,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","DIV","type=f32,ne=[1280,1,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","ADD","type=f32,ne=[1280,1,1,1],nr=[1,16,16,1],nf=1","support","1","yes","CUDA" -"CUDA0","SUB","type=f32,ne=[1280,1,1,1],nr=[1,16,16,1],nf=1","support","1","yes","CUDA" -"CUDA0","MUL","type=f32,ne=[1280,1,1,1],nr=[1,16,16,1],nf=1","support","1","yes","CUDA" -"CUDA0","DIV","type=f32,ne=[1280,1,1,1],nr=[1,16,16,1],nf=1","support","1","yes","CUDA" -"CUDA0","ADD","type=f32,ne=[1280,16,16,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","SUB","type=f32,ne=[1280,16,16,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","MUL","type=f32,ne=[1280,16,16,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","DIV","type=f32,ne=[1280,16,16,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","ADD","type=f32,ne=[1280,1,1,1],nr=[1,256,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","SUB","type=f32,ne=[1280,1,1,1],nr=[1,256,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","MUL","type=f32,ne=[1280,1,1,1],nr=[1,256,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","DIV","type=f32,ne=[1280,1,1,1],nr=[1,256,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","ADD","type=f32,ne=[1,1,1280,1],nr=[16,16,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","SUB","type=f32,ne=[1,1,1280,1],nr=[16,16,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","MUL","type=f32,ne=[1,1,1280,1],nr=[16,16,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","DIV","type=f32,ne=[1,1,1280,1],nr=[16,16,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","ADD","type=f32,ne=[16,16,1280,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","SUB","type=f32,ne=[16,16,1280,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","MUL","type=f32,ne=[16,16,1280,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","DIV","type=f32,ne=[16,16,1280,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","ADD","type=f32,ne=[1,1,1920,1],nr=[16,16,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","SUB","type=f32,ne=[1,1,1920,1],nr=[16,16,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","MUL","type=f32,ne=[1,1,1920,1],nr=[16,16,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","DIV","type=f32,ne=[1,1,1920,1],nr=[16,16,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","ADD","type=f32,ne=[1,1,2560,1],nr=[16,16,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","SUB","type=f32,ne=[1,1,2560,1],nr=[16,16,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","MUL","type=f32,ne=[1,1,2560,1],nr=[16,16,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","DIV","type=f32,ne=[1,1,2560,1],nr=[16,16,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","ADD","type=f32,ne=[1,1,1280,1],nr=[32,32,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","SUB","type=f32,ne=[1,1,1280,1],nr=[32,32,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","MUL","type=f32,ne=[1,1,1280,1],nr=[32,32,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","DIV","type=f32,ne=[1,1,1280,1],nr=[32,32,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","ADD","type=f32,ne=[1,1,1920,1],nr=[32,32,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","SUB","type=f32,ne=[1,1,1920,1],nr=[32,32,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","MUL","type=f32,ne=[1,1,1920,1],nr=[32,32,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","DIV","type=f32,ne=[1,1,1920,1],nr=[32,32,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","ADD","type=f32,ne=[1,1,640,1],nr=[32,32,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","SUB","type=f32,ne=[1,1,640,1],nr=[32,32,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","MUL","type=f32,ne=[1,1,640,1],nr=[32,32,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","DIV","type=f32,ne=[1,1,640,1],nr=[32,32,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","ADD","type=f32,ne=[5120,1,1,1],nr=[1,256,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","SUB","type=f32,ne=[5120,1,1,1],nr=[1,256,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","MUL","type=f32,ne=[5120,1,1,1],nr=[1,256,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","DIV","type=f32,ne=[5120,1,1,1],nr=[1,256,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","ADD","type=f32,ne=[640,1,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","SUB","type=f32,ne=[640,1,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","MUL","type=f32,ne=[640,1,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","DIV","type=f32,ne=[640,1,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" -"CUDA0","ADD","type=f32,ne=[10,5,4,3],nr=[2,1,1,1],nf=2","support","1","yes","CUDA" -"CUDA0","ADD","type=f32,ne=[16,5,4,3],nr=[1,2,1,1],nf=3","support","1","yes","CUDA" -"CUDA0","ADD","type=f32,ne=[10,5,4,3],nr=[1,1,2,1],nf=4","support","1","yes","CUDA" -"CUDA0","ADD","type=f32,ne=[16,5,4,3],nr=[1,1,1,2],nf=5","support","1","yes","CUDA" -"CUDA0","ADD","type=f32,ne=[10,5,4,3],nr=[1,1,2,2],nf=6","support","1","yes","CUDA" -"CUDA0","ADD","type=f32,ne=[10,5,4,3],nr=[1,2,2,2],nf=7","support","1","yes","CUDA" -"CUDA0","ADD","type=f32,ne=[16,5,4,3],nr=[2,2,2,2],nf=8","support","1","yes","CUDA" "CUDA0","ADD1","type=f32,ne=[10,5,4,3]","support","1","yes","CUDA" -"CUDA0","SCALE","type=f32,ne=[10,10,10,10],scale=2.000000,bias=0.000000","support","1","yes","CUDA" -"CUDA0","SCALE","type=f32,ne=[10,10,10,10],scale=2.000000,bias=1.000000","support","1","yes","CUDA" +"CUDA0","SCALE","type=f32,ne=[10,10,10,10],scale=2.000000,bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SCALE","type=f32,ne=[10,10,10,10],scale=2.000000,bias=1.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SCALE","type=f32,ne=[10,10,10,10],scale=2.000000,bias=1.000000,inplace=1","support","1","yes","CUDA" +"CUDA0","SCALE","type=f32,ne=[100,10,10,10],scale=2.000000,bias=1.000000,inplace=0","support","1","yes","CUDA" "CUDA0","SILU_BACK","type=f32,ne=[64,5,4,3],eps=0.000001","support","1","yes","CUDA" "CUDA0","NORM","type=f32,ne=[64,5,4,3],v=0,eps=0.000000","support","1","yes","CUDA" -"CUDA0","RMS_NORM","type=f32,ne=[64,5,4,3],v=0,eps=0.000000","support","1","yes","CUDA" +"CUDA0","RMS_NORM","type=f32,ne=[64,5,4,3],v=0,eps=0.000000,inplace=0","support","1","yes","CUDA" "CUDA0","NORM","type=f32,ne=[64,5,4,3],v=1,eps=0.000000","support","1","yes","CUDA" -"CUDA0","RMS_NORM","type=f32,ne=[64,5,4,3],v=1,eps=0.000000","support","1","yes","CUDA" +"CUDA0","RMS_NORM","type=f32,ne=[64,5,4,3],v=1,eps=0.000000,inplace=0","support","1","yes","CUDA" "CUDA0","RMS_NORM_BACK","type=f32,ne=[64,5,4,3],eps=0.000000","support","1","yes","CUDA" "CUDA0","L2_NORM","type=f32,ne=[64,5,4,3]","support","1","yes","CUDA" "CUDA0","NORM","type=f32,ne=[64,5,4,3],v=0,eps=0.000001","support","1","yes","CUDA" -"CUDA0","RMS_NORM","type=f32,ne=[64,5,4,3],v=0,eps=0.000001","support","1","yes","CUDA" +"CUDA0","RMS_NORM","type=f32,ne=[64,5,4,3],v=0,eps=0.000001,inplace=0","support","1","yes","CUDA" "CUDA0","NORM","type=f32,ne=[64,5,4,3],v=1,eps=0.000001","support","1","yes","CUDA" -"CUDA0","RMS_NORM","type=f32,ne=[64,5,4,3],v=1,eps=0.000001","support","1","yes","CUDA" +"CUDA0","RMS_NORM","type=f32,ne=[64,5,4,3],v=1,eps=0.000001,inplace=0","support","1","yes","CUDA" "CUDA0","RMS_NORM_BACK","type=f32,ne=[64,5,4,3],eps=0.000001","support","1","yes","CUDA" "CUDA0","L2_NORM","type=f32,ne=[64,5,4,3]","support","1","yes","CUDA" "CUDA0","NORM","type=f32,ne=[64,5,4,3],v=0,eps=0.000100","support","1","yes","CUDA" -"CUDA0","RMS_NORM","type=f32,ne=[64,5,4,3],v=0,eps=0.000100","support","1","yes","CUDA" +"CUDA0","RMS_NORM","type=f32,ne=[64,5,4,3],v=0,eps=0.000100,inplace=0","support","1","yes","CUDA" "CUDA0","NORM","type=f32,ne=[64,5,4,3],v=1,eps=0.000100","support","1","yes","CUDA" -"CUDA0","RMS_NORM","type=f32,ne=[64,5,4,3],v=1,eps=0.000100","support","1","yes","CUDA" +"CUDA0","RMS_NORM","type=f32,ne=[64,5,4,3],v=1,eps=0.000100,inplace=0","support","1","yes","CUDA" "CUDA0","RMS_NORM_BACK","type=f32,ne=[64,5,4,3],eps=0.000100","support","1","yes","CUDA" "CUDA0","L2_NORM","type=f32,ne=[64,5,4,3]","support","1","yes","CUDA" "CUDA0","NORM","type=f32,ne=[64,5,4,3],v=0,eps=0.100000","support","1","yes","CUDA" -"CUDA0","RMS_NORM","type=f32,ne=[64,5,4,3],v=0,eps=0.100000","support","1","yes","CUDA" +"CUDA0","RMS_NORM","type=f32,ne=[64,5,4,3],v=0,eps=0.100000,inplace=0","support","1","yes","CUDA" "CUDA0","NORM","type=f32,ne=[64,5,4,3],v=1,eps=0.100000","support","1","yes","CUDA" -"CUDA0","RMS_NORM","type=f32,ne=[64,5,4,3],v=1,eps=0.100000","support","1","yes","CUDA" +"CUDA0","RMS_NORM","type=f32,ne=[64,5,4,3],v=1,eps=0.100000,inplace=0","support","1","yes","CUDA" "CUDA0","RMS_NORM_BACK","type=f32,ne=[64,5,4,3],eps=0.100000","support","1","yes","CUDA" "CUDA0","L2_NORM","type=f32,ne=[64,5,4,3]","support","1","yes","CUDA" -"CUDA0","RMS_NORM_MUL_ADD","type=f32,ne=[64,5,4,3],eps=0.000000,broadcast=0","support","1","yes","CUDA" -"CUDA0","RMS_NORM_MUL_ADD","type=f32,ne=[64,5,4,3],eps=0.000000,broadcast=1","support","1","yes","CUDA" -"CUDA0","RMS_NORM_MUL_ADD","type=f32,ne=[64,5,4,3],eps=0.000001,broadcast=0","support","1","yes","CUDA" -"CUDA0","RMS_NORM_MUL_ADD","type=f32,ne=[64,5,4,3],eps=0.000001,broadcast=1","support","1","yes","CUDA" -"CUDA0","RMS_NORM_MUL_ADD","type=f32,ne=[64,5,4,3],eps=0.000100,broadcast=0","support","1","yes","CUDA" -"CUDA0","RMS_NORM_MUL_ADD","type=f32,ne=[64,5,4,3],eps=0.000100,broadcast=1","support","1","yes","CUDA" -"CUDA0","RMS_NORM_MUL_ADD","type=f32,ne=[64,5,4,3],eps=0.100000,broadcast=0","support","1","yes","CUDA" -"CUDA0","RMS_NORM_MUL_ADD","type=f32,ne=[64,5,4,3],eps=0.100000,broadcast=1","support","1","yes","CUDA" -"CUDA0","RMS_NORM_MUL_ADD","type=f32,ne=[64,5,4,3],eps=1.000000,broadcast=0","support","1","yes","CUDA" -"CUDA0","RMS_NORM_MUL_ADD","type=f32,ne=[64,5,4,3],eps=1.000000,broadcast=1","support","1","yes","CUDA" +"CUDA0","RMS_NORM","type=f32,ne=[64,5,4,3],v=0,eps=0.000001,inplace=1","support","1","yes","CUDA" "CUDA0","L2_NORM","type=f32,ne=[64,5,4,3]","support","1","yes","CUDA" "CUDA0","SSM_CONV","type=f32,ne_a=[4,1024,1,1],ne_b=[3,1024,1,1]","support","1","yes","CUDA" "CUDA0","SSM_CONV","type=f32,ne_a=[8,1024,1,1],ne_b=[3,1024,1,1]","support","1","yes","CUDA" @@ -2512,1243 +5479,1851 @@ "CUDA0","GATED_LINEAR_ATTN","type=f32,head_count=32,head_size=64,n_seq_tokens=32,n_seqs=1","support","1","yes","CUDA" "CUDA0","GATED_LINEAR_ATTN","type=f32,head_count=32,head_size=64,n_seq_tokens=32,n_seqs=4","support","1","yes","CUDA" "CUDA0","GATED_LINEAR_ATTN","type=f32,head_count=32,head_size=64,n_seq_tokens=128,n_seqs=4","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=32,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=1,k=32,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=1,k=32,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=32,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=1,k=32,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=1,k=1,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=64,n=2,k=128,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=83,n=2,k=128,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=64,n=2,k=64,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=83,n=2,k=64,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=64,n=45,k=128,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=45,k=64,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=193,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=67,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=16,n_used=16,b=0,m=32,n=1024,k=16","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=16,n_used=16,b=1,m=32,n=1024,k=16","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=129,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q5_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q5_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q5_1,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q5_1,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q2_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q2_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q3_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q3_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q5_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q5_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q6_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q6_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_s,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_s,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq3_xxs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq3_xxs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq1_s,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq1_s,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq1_m,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq1_m,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq4_nl,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq4_nl,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq3_s,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq3_s,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq4_xs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq4_xs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=bf16,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=bf16,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=256,bs=[1536,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=256,bs=[1536,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=256,bs=[1536,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=8,k=256,bs=[1536,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=8,k=256,bs=[1536,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=8,k=256,bs=[1536,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=8,k=256,bs=[1536,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=8,k=256,bs=[1536,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=8,k=256,bs=[1536,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=8,k=256,bs=[1536,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=8,k=256,bs=[1536,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=8,k=256,bs=[1536,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=8,k=256,bs=[1536,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=8,k=256,bs=[1536,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=8,k=256,bs=[1536,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=8,k=256,bs=[1536,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=32,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=1,k=32,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=1,k=32,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=32,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=1,k=32,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=1,k=1,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=64,n=2,k=128,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=83,n=2,k=128,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=64,n=2,k=64,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=83,n=2,k=64,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=64,n=45,k=128,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=45,k=64,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=193,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=67,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=64,n=77,k=77,bs=[12,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[1,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[1,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[1,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[1,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[1,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[1,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[1,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[1,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[1,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[1,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[1,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[1,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[1,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[1,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[1,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[1,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[1,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[1,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[1,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[1,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[1,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[1,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[1,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[1,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[1,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[1,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[1,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[1,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[1,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[1,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[1,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[1,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[1,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[1,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[1,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[1,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[1,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[1,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[1,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[1,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[1,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[1,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[1,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[1,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[1,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[1,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[1,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[1,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[2,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[2,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[2,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[2,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[2,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[2,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[2,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[2,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[2,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[2,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[2,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[2,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[2,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[2,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[2,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[2,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[2,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[2,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[2,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[2,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[2,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[2,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[2,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[2,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[2,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[2,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[2,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[2,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[2,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[2,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[2,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[2,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[2,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[2,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[2,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[2,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[4,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[4,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[4,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[4,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[4,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[4,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[4,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[4,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[4,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[4,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[4,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[4,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[4,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[4,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[4,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[4,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[4,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[4,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[4,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[4,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[4,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[4,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[4,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[4,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[4,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[4,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[4,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[4,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[4,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[4,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[4,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[4,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[4,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[4,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[4,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[4,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[4,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[4,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[4,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[4,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[4,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[4,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[4,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[4,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[4,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[4,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[4,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[4,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[8,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[8,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[8,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[8,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[8,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[8,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[8,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[8,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[8,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[8,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[8,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[8,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[8,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[8,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[8,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[8,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[8,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[8,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[8,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[8,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[8,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[8,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[8,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[8,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[8,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[8,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[8,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[8,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[8,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[8,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[8,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[8,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[8,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[8,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[8,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[8,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[8,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[8,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[8,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[8,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[8,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[8,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[8,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[8,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[8,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[8,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[8,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[8,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=16,n_used=16,b=0,m=32,n=1024,k=16,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=2,n_used=2,b=0,m=32,n=8192,k=64,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=16,n_used=16,b=0,m=50,n=200,k=64,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=16,n_used=16,b=1,m=32,n=1024,k=16,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=2,n_used=2,b=1,m=32,n=8192,k=64,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=16,n_used=16,b=1,m=50,n=200,k=64,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=1,n_used=1,b=0,m=8,n=16,k=1,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=32,n_used=2,b=0,m=2880,n=32,k=2880,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=17,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=17,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=17,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=17,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=17,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=17,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=17,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=17,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=17,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=17,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=17,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=17,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=17,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=17,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=17,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=17,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=17,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=17,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=17,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=17,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=17,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=17,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=17,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=17,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=17,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=17,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=17,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=17,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=17,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=17,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=17,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=17,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=17,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=17,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=17,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=17,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=17,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=17,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=17,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=17,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=17,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=17,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=17,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=17,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=17,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=17,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=17,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=17,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=17,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=17,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=17,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=17,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=17,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=17,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=17,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=17,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=17,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=17,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=17,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=17,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=17,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=17,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=17,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=17,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=17,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=17,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=17,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=17,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=17,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=17,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=17,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=17,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=17,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=17,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=17,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=17,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=17,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=17,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=17,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=17,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=17,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=17,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=17,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=17,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=17,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=17,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=17,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=17,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=17,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=17,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=17,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=17,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=17,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=17,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=17,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=17,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q5_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q5_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q5_1,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q5_1,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q2_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q2_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q3_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q3_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q5_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q5_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q6_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q6_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_s,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_s,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq3_xxs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq3_xxs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq1_s,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq1_s,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq1_m,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq1_m,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq4_nl,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq4_nl,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq3_s,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq3_s,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq4_xs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq4_xs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=bf16,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=bf16,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" "CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","1","yes","CUDA" "CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","1","yes","CUDA" "CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","1","yes","CUDA" @@ -4517,6 +8092,134 @@ "CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA" "CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA" "CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA" +"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA" "CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA" "CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA" "CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA" @@ -4645,548 +8348,796 @@ "CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA" "CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA" "CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA" +"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=4,n_experts_used=1,n_token=1","support","1","yes","CUDA" +"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=4,n_experts_used=1,n_token=32","support","1","yes","CUDA" +"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=4,n_experts_used=1,n_token=129","support","1","yes","CUDA" +"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=4,n_experts_used=1,n_token=1","support","1","yes","CUDA" +"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=4,n_experts_used=1,n_token=32","support","1","yes","CUDA" +"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=4,n_experts_used=1,n_token=129","support","1","yes","CUDA" +"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=4,n_experts_used=2,n_token=1","support","1","yes","CUDA" +"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=4,n_experts_used=2,n_token=32","support","1","yes","CUDA" +"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=4,n_experts_used=2,n_token=129","support","1","yes","CUDA" +"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=4,n_experts_used=2,n_token=1","support","1","yes","CUDA" +"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=4,n_experts_used=2,n_token=32","support","1","yes","CUDA" +"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=4,n_experts_used=2,n_token=129","support","1","yes","CUDA" +"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=4,n_experts_used=4,n_token=1","support","1","yes","CUDA" +"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=4,n_experts_used=4,n_token=32","support","1","yes","CUDA" +"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=4,n_experts_used=4,n_token=129","support","1","yes","CUDA" +"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=4,n_experts_used=4,n_token=1","support","1","yes","CUDA" +"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=4,n_experts_used=4,n_token=32","support","1","yes","CUDA" +"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=4,n_experts_used=4,n_token=129","support","1","yes","CUDA" +"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=8,n_experts_used=1,n_token=1","support","1","yes","CUDA" +"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=8,n_experts_used=1,n_token=32","support","1","yes","CUDA" +"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=8,n_experts_used=1,n_token=129","support","1","yes","CUDA" +"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=8,n_experts_used=1,n_token=1","support","1","yes","CUDA" +"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=8,n_experts_used=1,n_token=32","support","1","yes","CUDA" +"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=8,n_experts_used=1,n_token=129","support","1","yes","CUDA" +"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=8,n_experts_used=2,n_token=1","support","1","yes","CUDA" +"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=8,n_experts_used=2,n_token=32","support","1","yes","CUDA" +"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=8,n_experts_used=2,n_token=129","support","1","yes","CUDA" +"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=8,n_experts_used=2,n_token=1","support","1","yes","CUDA" +"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=8,n_experts_used=2,n_token=32","support","1","yes","CUDA" +"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=8,n_experts_used=2,n_token=129","support","1","yes","CUDA" +"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=8,n_experts_used=4,n_token=1","support","1","yes","CUDA" +"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=8,n_experts_used=4,n_token=32","support","1","yes","CUDA" +"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=8,n_experts_used=4,n_token=129","support","1","yes","CUDA" +"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=8,n_experts_used=4,n_token=1","support","1","yes","CUDA" +"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=8,n_experts_used=4,n_token=32","support","1","yes","CUDA" +"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=8,n_experts_used=4,n_token=129","support","1","yes","CUDA" "CUDA0","SQR","type=f16,ne=[10,5,4,3]","support","1","yes","CUDA" "CUDA0","SQRT","type=f16,ne=[10,3,3,2]","support","1","yes","CUDA" "CUDA0","LOG","type=f16,ne=[10,5,4,3]","support","1","yes","CUDA" "CUDA0","SIN","type=f16,ne=[10,2,2,2]","support","1","yes","CUDA" "CUDA0","COS","type=f16,ne=[10,2,2,2]","support","1","yes","CUDA" "CUDA0","CLAMP","type=f16,ne=[10,5,4,3],min=-0.500000,max=0.500000","support","1","yes","CUDA" +"CUDA0","LEAKY_RELU","type=f16,ne_a=[10,5,4,3],negative_slope=0.100000","support","1","yes","CUDA" +"CUDA0","FLOOR","type=f16,ne=[10,2,2,2]","support","1","yes","CUDA" +"CUDA0","CEIL","type=f16,ne=[10,2,2,2]","support","1","yes","CUDA" +"CUDA0","ROUND","type=f16,ne=[10,2,2,2]","support","1","yes","CUDA" +"CUDA0","TRUNC","type=f16,ne=[10,2,2,2]","support","1","yes","CUDA" +"CUDA0","SQR","type=f16,ne=[7,1,5,3]","support","1","yes","CUDA" +"CUDA0","SQRT","type=f16,ne=[7,1,5,3]","support","1","yes","CUDA" +"CUDA0","LOG","type=f16,ne=[7,1,5,3]","support","1","yes","CUDA" +"CUDA0","SIN","type=f16,ne=[7,1,5,3]","support","1","yes","CUDA" +"CUDA0","COS","type=f16,ne=[7,1,5,3]","support","1","yes","CUDA" +"CUDA0","CLAMP","type=f16,ne=[7,1,5,3],min=-0.500000,max=0.500000","support","1","yes","CUDA" +"CUDA0","LEAKY_RELU","type=f16,ne_a=[7,1,5,3],negative_slope=0.100000","support","1","yes","CUDA" +"CUDA0","FLOOR","type=f16,ne=[7,1,5,3]","support","1","yes","CUDA" +"CUDA0","CEIL","type=f16,ne=[7,1,5,3]","support","1","yes","CUDA" +"CUDA0","ROUND","type=f16,ne=[7,1,5,3]","support","1","yes","CUDA" +"CUDA0","TRUNC","type=f16,ne=[7,1,5,3]","support","1","yes","CUDA" "CUDA0","SQR","type=f32,ne=[10,5,4,3]","support","1","yes","CUDA" "CUDA0","SQRT","type=f32,ne=[10,3,3,2]","support","1","yes","CUDA" "CUDA0","LOG","type=f32,ne=[10,5,4,3]","support","1","yes","CUDA" "CUDA0","SIN","type=f32,ne=[10,2,2,2]","support","1","yes","CUDA" "CUDA0","COS","type=f32,ne=[10,2,2,2]","support","1","yes","CUDA" "CUDA0","CLAMP","type=f32,ne=[10,5,4,3],min=-0.500000,max=0.500000","support","1","yes","CUDA" +"CUDA0","LEAKY_RELU","type=f32,ne_a=[10,5,4,3],negative_slope=0.100000","support","1","yes","CUDA" +"CUDA0","FLOOR","type=f32,ne=[10,2,2,2]","support","1","yes","CUDA" +"CUDA0","CEIL","type=f32,ne=[10,2,2,2]","support","1","yes","CUDA" +"CUDA0","ROUND","type=f32,ne=[10,2,2,2]","support","1","yes","CUDA" +"CUDA0","TRUNC","type=f32,ne=[10,2,2,2]","support","1","yes","CUDA" +"CUDA0","SQR","type=f32,ne=[7,1,5,3]","support","1","yes","CUDA" +"CUDA0","SQRT","type=f32,ne=[7,1,5,3]","support","1","yes","CUDA" +"CUDA0","LOG","type=f32,ne=[7,1,5,3]","support","1","yes","CUDA" +"CUDA0","SIN","type=f32,ne=[7,1,5,3]","support","1","yes","CUDA" +"CUDA0","COS","type=f32,ne=[7,1,5,3]","support","1","yes","CUDA" +"CUDA0","CLAMP","type=f32,ne=[7,1,5,3],min=-0.500000,max=0.500000","support","1","yes","CUDA" +"CUDA0","LEAKY_RELU","type=f32,ne_a=[7,1,5,3],negative_slope=0.100000","support","1","yes","CUDA" +"CUDA0","FLOOR","type=f32,ne=[7,1,5,3]","support","1","yes","CUDA" +"CUDA0","CEIL","type=f32,ne=[7,1,5,3]","support","1","yes","CUDA" +"CUDA0","ROUND","type=f32,ne=[7,1,5,3]","support","1","yes","CUDA" +"CUDA0","TRUNC","type=f32,ne=[7,1,5,3]","support","1","yes","CUDA" "CUDA0","DIAG_MASK_INF","type=f32,ne=[10,10,1,1],n_past=5","support","1","yes","CUDA" "CUDA0","DIAG_MASK_INF","type=f32,ne=[10,10,3,1],n_past=5","support","1","yes","CUDA" "CUDA0","DIAG_MASK_INF","type=f32,ne=[10,10,3,2],n_past=5","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,m_prec=f32,nr23=[3,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f32,nr23=[2,3],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,m_prec=f16,nr23=[3,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f16,nr23=[2,3],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,m_prec=f32,nr23=[3,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f32,nr23=[2,3],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,m_prec=f16,nr23=[3,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f16,nr23=[2,3],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,m_prec=f32,nr23=[3,1],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f32,nr23=[2,3],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,m_prec=f16,nr23=[3,1],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f16,nr23=[2,3],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,m_prec=f32,nr23=[3,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f32,nr23=[2,3],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,m_prec=f16,nr23=[3,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f16,nr23=[2,3],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,2,32,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,2,32,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,2,32,1],mask=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[32,2,32,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[32,2,32,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[32,2,32,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[32,2,32,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=0,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=0,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=0,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=0,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=0,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=0,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=0,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=0,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=0,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=0,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=0,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=0,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=0,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=0,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=0,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=0,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,2,32,1],mask=0,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=1","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,2,32,1],mask=0,sinks=0,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=1","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=0,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=0,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=0,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=0,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=0,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=0,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=0,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=0,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=0,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=0,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=0,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=0,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=0,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=0,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=0,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=0,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,2,32,1],mask=0,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=1","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,2,32,1],mask=0,sinks=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=1","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,sinks=0,m_prec=f32,nr23=[3,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=0,m_prec=f32,nr23=[2,3],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,sinks=0,m_prec=f16,nr23=[3,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=0,m_prec=f16,nr23=[2,3],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,sinks=0,m_prec=f32,nr23=[3,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=0,m_prec=f32,nr23=[2,3],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,sinks=0,m_prec=f16,nr23=[3,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=0,m_prec=f16,nr23=[2,3],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,sinks=0,m_prec=f32,nr23=[3,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=0,m_prec=f32,nr23=[2,3],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,sinks=0,m_prec=f16,nr23=[3,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=0,m_prec=f16,nr23=[2,3],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,sinks=0,m_prec=f32,nr23=[3,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=0,m_prec=f32,nr23=[2,3],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,sinks=0,m_prec=f16,nr23=[3,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=0,m_prec=f16,nr23=[2,3],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,2,32,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=1","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,2,32,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=1","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,sinks=1,m_prec=f32,nr23=[3,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=1,m_prec=f32,nr23=[2,3],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,sinks=1,m_prec=f16,nr23=[3,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=1,m_prec=f16,nr23=[2,3],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,sinks=1,m_prec=f32,nr23=[3,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=1,m_prec=f32,nr23=[2,3],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,sinks=1,m_prec=f16,nr23=[3,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=1,m_prec=f16,nr23=[2,3],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,sinks=1,m_prec=f32,nr23=[3,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=1,m_prec=f32,nr23=[2,3],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,sinks=1,m_prec=f16,nr23=[3,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=1,m_prec=f16,nr23=[2,3],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,sinks=1,m_prec=f32,nr23=[3,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=1,m_prec=f32,nr23=[2,3],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,sinks=1,m_prec=f16,nr23=[3,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=1,m_prec=f16,nr23=[2,3],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,2,32,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=1","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,2,32,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=1","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,2,32,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,2,32,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,2,32,1],mask=0,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[32,2,32,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[32,2,32,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[32,2,32,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[32,2,32,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" "CUDA0","SOFT_MAX_BACK","type=f32,ne=[16,16,1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" "CUDA0","SOFT_MAX_BACK","type=f32,ne=[15,15,1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX_BACK","type=f32,ne=[16,16,2,3],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" "CUDA0","SOFT_MAX_BACK","type=f32,ne=[16,1024,1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" "CUDA0","SOFT_MAX_BACK","type=f32,ne=[15,1023,1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX_BACK","type=f32,ne=[16,1024,2,3],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" "CUDA0","SOFT_MAX_BACK","type=f32,ne=[1024,16,1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" "CUDA0","SOFT_MAX_BACK","type=f32,ne=[1023,15,1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX_BACK","type=f32,ne=[1024,16,2,3],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" "CUDA0","SOFT_MAX_BACK","type=f32,ne=[1024,1024,1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" "CUDA0","SOFT_MAX_BACK","type=f32,ne=[1023,1023,1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX_BACK","type=f32,ne=[1024,1024,2,3],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" "CUDA0","SOFT_MAX_BACK","type=f32,ne=[16,16,1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" "CUDA0","SOFT_MAX_BACK","type=f32,ne=[15,15,1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX_BACK","type=f32,ne=[16,16,2,3],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" "CUDA0","SOFT_MAX_BACK","type=f32,ne=[16,1024,1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" "CUDA0","SOFT_MAX_BACK","type=f32,ne=[15,1023,1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX_BACK","type=f32,ne=[16,1024,2,3],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" "CUDA0","SOFT_MAX_BACK","type=f32,ne=[1024,16,1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" "CUDA0","SOFT_MAX_BACK","type=f32,ne=[1023,15,1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX_BACK","type=f32,ne=[1024,16,2,3],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" "CUDA0","SOFT_MAX_BACK","type=f32,ne=[1024,1024,1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" "CUDA0","SOFT_MAX_BACK","type=f32,ne=[1023,1023,1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX_BACK","type=f32,ne=[1024,1024,2,3],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" "CUDA0","SOFT_MAX_BACK","type=f32,ne=[16,16,1,1],scale=1.000000,max_bias=8.000000","support","0","no","CUDA" "CUDA0","SOFT_MAX_BACK","type=f32,ne=[15,15,1,1],scale=1.000000,max_bias=8.000000","support","0","no","CUDA" +"CUDA0","SOFT_MAX_BACK","type=f32,ne=[16,16,2,3],scale=1.000000,max_bias=8.000000","support","0","no","CUDA" "CUDA0","SOFT_MAX_BACK","type=f32,ne=[16,1024,1,1],scale=1.000000,max_bias=8.000000","support","0","no","CUDA" "CUDA0","SOFT_MAX_BACK","type=f32,ne=[15,1023,1,1],scale=1.000000,max_bias=8.000000","support","0","no","CUDA" +"CUDA0","SOFT_MAX_BACK","type=f32,ne=[16,1024,2,3],scale=1.000000,max_bias=8.000000","support","0","no","CUDA" "CUDA0","SOFT_MAX_BACK","type=f32,ne=[1024,16,1,1],scale=1.000000,max_bias=8.000000","support","0","no","CUDA" "CUDA0","SOFT_MAX_BACK","type=f32,ne=[1023,15,1,1],scale=1.000000,max_bias=8.000000","support","0","no","CUDA" +"CUDA0","SOFT_MAX_BACK","type=f32,ne=[1024,16,2,3],scale=1.000000,max_bias=8.000000","support","0","no","CUDA" "CUDA0","SOFT_MAX_BACK","type=f32,ne=[1024,1024,1,1],scale=1.000000,max_bias=8.000000","support","0","no","CUDA" "CUDA0","SOFT_MAX_BACK","type=f32,ne=[1023,1023,1,1],scale=1.000000,max_bias=8.000000","support","0","no","CUDA" +"CUDA0","SOFT_MAX_BACK","type=f32,ne=[1024,1024,2,3],scale=1.000000,max_bias=8.000000","support","0","no","CUDA" "CUDA0","SOFT_MAX_BACK","type=f32,ne=[16,16,1,1],scale=0.100000,max_bias=8.000000","support","0","no","CUDA" "CUDA0","SOFT_MAX_BACK","type=f32,ne=[15,15,1,1],scale=0.100000,max_bias=8.000000","support","0","no","CUDA" +"CUDA0","SOFT_MAX_BACK","type=f32,ne=[16,16,2,3],scale=0.100000,max_bias=8.000000","support","0","no","CUDA" "CUDA0","SOFT_MAX_BACK","type=f32,ne=[16,1024,1,1],scale=0.100000,max_bias=8.000000","support","0","no","CUDA" "CUDA0","SOFT_MAX_BACK","type=f32,ne=[15,1023,1,1],scale=0.100000,max_bias=8.000000","support","0","no","CUDA" +"CUDA0","SOFT_MAX_BACK","type=f32,ne=[16,1024,2,3],scale=0.100000,max_bias=8.000000","support","0","no","CUDA" "CUDA0","SOFT_MAX_BACK","type=f32,ne=[1024,16,1,1],scale=0.100000,max_bias=8.000000","support","0","no","CUDA" "CUDA0","SOFT_MAX_BACK","type=f32,ne=[1023,15,1,1],scale=0.100000,max_bias=8.000000","support","0","no","CUDA" +"CUDA0","SOFT_MAX_BACK","type=f32,ne=[1024,16,2,3],scale=0.100000,max_bias=8.000000","support","0","no","CUDA" "CUDA0","SOFT_MAX_BACK","type=f32,ne=[1024,1024,1,1],scale=0.100000,max_bias=8.000000","support","0","no","CUDA" "CUDA0","SOFT_MAX_BACK","type=f32,ne=[1023,1023,1,1],scale=0.100000,max_bias=8.000000","support","0","no","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,40,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,52,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,64,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[64,1,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[64,71,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[64,8,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[80,16,2,1],n_dims=80,mode=24,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,40,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,52,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,64,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[64,1,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[64,71,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[64,8,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[80,16,2,1],n_dims=80,mode=24,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,40,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,52,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,64,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[64,1,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[64,71,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[64,8,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[80,16,2,1],n_dims=80,mode=24,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,40,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,52,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,64,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[64,1,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[64,71,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[64,8,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[80,16,2,1],n_dims=80,mode=24,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=1","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,40,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,52,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,64,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[64,1,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[64,71,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[64,8,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[80,16,2,1],n_dims=80,mode=24,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,40,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,52,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,64,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[64,1,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[64,71,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[64,8,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[80,16,2,1],n_dims=80,mode=24,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,40,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,52,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,64,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[64,1,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[64,71,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[64,8,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[80,16,2,1],n_dims=80,mode=24,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,40,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,52,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,64,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[64,1,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[64,71,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[64,8,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[80,16,2,1],n_dims=80,mode=24,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=1","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=1","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=1","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=1","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=1","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=1","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=1","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=1","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=1","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=1","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=1","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=1","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=1","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=1","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=1","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=1","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=1","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=1","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=1","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=1","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=1","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=1","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=1","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=1","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=1","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=1","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=1","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=1","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=1","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=1","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=1","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=1","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=1","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=1","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=1","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=1","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=1","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=1","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=1","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=1","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=1","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=1","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=1","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=1","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=1","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=1","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=1","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=1","support","1","yes","CUDA" +"CUDA0","SOFT_MAX_BACK","type=f32,ne=[1024,1024,2,3],scale=0.100000,max_bias=8.000000","support","0","no","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,40,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,52,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,64,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[64,1,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[64,71,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[64,8,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[80,16,2,1],n_dims=80,mode=24,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,16,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,40,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,52,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,64,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[64,1,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[64,71,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[64,8,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[80,16,2,1],n_dims=80,mode=24,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,16,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,40,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,52,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,64,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[64,1,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[64,71,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[64,8,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[80,16,2,1],n_dims=80,mode=24,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,16,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,40,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,52,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,64,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[64,1,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[64,71,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[64,8,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[80,16,2,1],n_dims=80,mode=24,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,16,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,40,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,52,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,64,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[64,1,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[64,71,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[64,8,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[80,16,2,1],n_dims=80,mode=24,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,16,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,40,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,52,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,64,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[64,1,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[64,71,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[64,8,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[80,16,2,1],n_dims=80,mode=24,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,16,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,40,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,52,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,64,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[64,1,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[64,71,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[64,8,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[80,16,2,1],n_dims=80,mode=24,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,16,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,40,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,52,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,64,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[64,1,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[64,71,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[64,8,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[80,16,2,1],n_dims=80,mode=24,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,16,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0,inplace=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0,inplace=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0,inplace=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0,inplace=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0,inplace=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0,inplace=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0,inplace=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0,inplace=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=24,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0,inplace=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=24,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0,inplace=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0,inplace=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0,inplace=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0,inplace=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0,inplace=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0,inplace=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0,inplace=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0,inplace=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0,inplace=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=24,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0,inplace=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=24,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0,inplace=1","support","1","yes","CUDA" "CUDA0","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=0,v=0","support","1","yes","CUDA" "CUDA0","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=0,v=0","support","0","no","CUDA" "CUDA0","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=1,v=0","support","1","yes","CUDA" @@ -5222,9 +9173,15 @@ "CUDA0","ARGSORT","type=f32,ne=[8,1,1,1],order=0","support","1","yes","CUDA" "CUDA0","ARGSORT","type=f32,ne=[16,10,10,10],order=0","support","1","yes","CUDA" "CUDA0","ARGSORT","type=f32,ne=[60,10,10,10],order=0","support","1","yes","CUDA" +"CUDA0","ARGSORT","type=f32,ne=[1024,1,1,1],order=0","support","1","yes","CUDA" +"CUDA0","ARGSORT","type=f32,ne=[16384,1,1,1],order=0","support","1","yes","CUDA" +"CUDA0","ARGSORT","type=f32,ne=[2,8,8192,1],order=0","support","1","yes","CUDA" "CUDA0","ARGSORT","type=f32,ne=[8,1,1,1],order=1","support","1","yes","CUDA" "CUDA0","ARGSORT","type=f32,ne=[16,10,10,10],order=1","support","1","yes","CUDA" "CUDA0","ARGSORT","type=f32,ne=[60,10,10,10],order=1","support","1","yes","CUDA" +"CUDA0","ARGSORT","type=f32,ne=[1024,1,1,1],order=1","support","1","yes","CUDA" +"CUDA0","ARGSORT","type=f32,ne=[16384,1,1,1],order=1","support","1","yes","CUDA" +"CUDA0","ARGSORT","type=f32,ne=[2,8,8192,1],order=1","support","1","yes","CUDA" "CUDA0","UPSCALE","type=f32,ne=[512,512,3,2],scale_factor=2,mode=nearest,transpose=0","support","1","yes","CUDA" "CUDA0","UPSCALE","type=f32,ne=[512,512,3,2],scale_factor=2,mode=nearest,transpose=1","support","1","yes","CUDA" "CUDA0","UPSCALE","type=f32,ne=[2,5,7,11],ne_tgt=[5,7,11,13],mode=0","support","1","yes","CUDA" @@ -5234,2116 +9191,8845 @@ "CUDA0","UPSCALE","type=f32,ne=[2,5,7,11],ne_tgt=[5,7,11,13],mode=1","support","1","yes","CUDA" "CUDA0","UPSCALE","type=f32,ne=[5,7,11,13],ne_tgt=[2,5,7,11],mode=1","support","1","yes","CUDA" "CUDA0","UPSCALE","type=f32,ne=[2,5,7,11],ne_tgt=[5,7,11,13],mode=257","support","1","yes","CUDA" +"CUDA0","UPSCALE","type=f32,ne=[1,4,3,2],ne_tgt=[2,8,3,2],mode=257","support","1","yes","CUDA" +"CUDA0","UPSCALE","type=f32,ne=[4,1,3,2],ne_tgt=[1,1,3,2],mode=257","support","1","yes","CUDA" "CUDA0","SUM","type=f32,ne=[10,5,4,3]","support","1","yes","CUDA" -"CUDA0","SUM_ROWS","type=f32,ne=[10,5,4,3]","support","1","yes","CUDA" +"CUDA0","SUM_ROWS","type=f32,ne=[10,5,4,3],permute=0,slice=0","support","1","yes","CUDA" +"CUDA0","SUM","type=f32,ne=[11,5,6,3],permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","SUM","type=f32,ne=[11,5,6,3],permute=[0,3,2,1]","support","1","yes","CUDA" +"CUDA0","SUM","type=f32,ne=[11,5,6,3],permute=[0,1,3,2]","support","1","yes","CUDA" +"CUDA0","SUM_ROWS","type=f32,ne=[11,5,6,3],permute=1,slice=0","support","0","no","CUDA" +"CUDA0","SUM_ROWS","type=f32,ne=[11,5,6,3],permute=0,slice=1","support","0","no","CUDA" +"CUDA0","SUM_ROWS","type=f32,ne=[11,5,6,3],permute=1,slice=1","support","0","no","CUDA" "CUDA0","MEAN","type=f32,ne=[10,5,4,3]","support","1","yes","CUDA" +"CUDA0","SUM","type=f32,ne=[33,1,1,1]","support","1","yes","CUDA" +"CUDA0","SUM_ROWS","type=f32,ne=[33,1,1,1],permute=0,slice=0","support","1","yes","CUDA" +"CUDA0","MEAN","type=f32,ne=[33,1,1,1]","support","1","yes","CUDA" +"CUDA0","SUM","type=f32,ne=[33,1024,1,1]","support","1","yes","CUDA" +"CUDA0","SUM_ROWS","type=f32,ne=[33,1024,1,1],permute=0,slice=0","support","1","yes","CUDA" +"CUDA0","SUM","type=f32,ne=[33,256,1,1]","support","1","yes","CUDA" +"CUDA0","SUM","type=f32,ne=[33,256,1,1],permute=[1,0,2,3]","support","0","no","CUDA" +"CUDA0","SUM_ROWS","type=f32,ne=[33,256,1,1],permute=0,slice=0","support","1","yes","CUDA" +"CUDA0","MEAN","type=f32,ne=[33,256,1,1]","support","1","yes","CUDA" +"CUDA0","MEAN","type=f32,ne=[32769,1,1,1]","support","1","yes","CUDA" "CUDA0","GROUP_NORM","type=f32,ne=[64,64,320,1],num_groups=32,eps=0.000001","support","1","yes","CUDA" "CUDA0","GROUP_NORM","type=f32,ne=[9,9,1280,1],num_groups=32,eps=0.000001","support","1","yes","CUDA" "CUDA0","ACC","type=f32,ne_a=[256,17,1,1],ne_b=[256,16,1,1]","support","1","yes","CUDA" "CUDA0","PAD","type=f32,ne_a=[512,512,1,1],pad_0=1,pad_1=1","support","1","yes","CUDA" -"CUDA0","PAD_REFLECT_1D","type=f32,ne_a=[512,34,2,1],pad_0=10,pad_1=9","support","0","no","CUDA" -"CUDA0","ROLL","shift0=3,shift1=-2,shift3=1,shift4=-1","support","0","no","CUDA" +"CUDA0","PAD","type=f32,ne_a=[512,512,3,1],lp0=1,rp0=1,lp1=1,rp1=1,lp2=1,rp2=1,lp3=1,rp3=1,v=0","support","1","yes","CUDA" +"CUDA0","PAD_REFLECT_1D","type=f32,ne_a=[512,34,2,1],pad_0=10,pad_1=9","support","1","yes","CUDA" +"CUDA0","PAD_REFLECT_1D","type=f32,ne_a=[3000,384,4,1],pad_0=10,pad_1=9","support","1","yes","CUDA" +"CUDA0","ROLL","shift0=3,shift1=-2,shift3=1,shift4=-1","support","1","yes","CUDA" "CUDA0","ARANGE","type=f32,start=0.000000,stop=10.000000,step=1.000000","support","1","yes","CUDA" "CUDA0","TIMESTEP_EMBEDDING","type=f32,ne_a=[2,1,1,1],dim=320,max_period=10000","support","1","yes","CUDA" "CUDA0","LEAKY_RELU","type=f32,ne_a=[10,5,4,3],negative_slope=0.100000","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","PAD","type=f32,ne_a=[512,512,1,1],lp0=0,rp0=1,lp1=0,rp1=1,lp2=0,rp2=0,lp3=0,rp3=0,v=0","support","1","yes","CUDA" +"CUDA0","PAD","type=f32,ne_a=[11,22,33,44],lp0=1,rp0=2,lp1=3,rp1=4,lp2=5,rp2=6,lp3=7,rp3=8,v=0","support","1","yes","CUDA" +"CUDA0","PAD","type=f32,ne_a=[512,512,1,1],lp0=0,rp0=1,lp1=0,rp1=1,lp2=0,rp2=0,lp3=0,rp3=0,v=1","support","0","no","CUDA" +"CUDA0","PAD","type=f32,ne_a=[11,22,33,44],lp0=1,rp0=2,lp1=3,rp1=4,lp2=5,rp2=6,lp3=7,rp3=8,v=1","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" "CUDA0","CROSS_ENTROPY_LOSS","type=f32,ne=[10,5,4,3]","support","1","yes","CUDA" "CUDA0","CROSS_ENTROPY_LOSS","type=f32,ne=[30000,1,1,1]","support","1","yes","CUDA" "CUDA0","CROSS_ENTROPY_LOSS_BACK","type=f32,ne=[10,5,4,3]","support","1","yes","CUDA" "CUDA0","CROSS_ENTROPY_LOSS_BACK","type=f32,ne=[30000,1,1,1]","support","1","yes","CUDA" "CUDA0","OPT_STEP_ADAMW","type=f32,ne=[10,5,4,3]","support","1","yes","CUDA" +"CUDA0","OPT_STEP_SGD","type=f32,ne=[10,5,4,3]","support","1","yes","CUDA" From 820b62814ce5eb22977b1c67e040362da6a20529 Mon Sep 17 00:00:00 2001 From: mnehete32 Date: Tue, 4 Nov 2025 23:15:23 +0530 Subject: [PATCH 4/5] docs: only updating ops added by me --- docs/ops.md | 40 +- docs/ops/CUDA.csv | 21162 +++++++++++--------------------------------- 2 files changed, 5258 insertions(+), 15944 deletions(-) diff --git a/docs/ops.md b/docs/ops.md index cc89c7aa8b36d..3738a48072832 100644 --- a/docs/ops.md +++ b/docs/ops.md @@ -16,17 +16,17 @@ Legend: |-----------|------|------|------|------|------|------|------|------|------| | ABS | ❌ | ✅ | ✅ | 🟡 | 🟡 | ❌ | 🟡 | ❌ | ❌ | | ACC | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ | -| ADD | ❌ | ✅ | ✅ | ❌ | 🟡 | 🟡 | ✅ | ✅ | ❌ | +| ADD | ❌ | ✅ | ✅ | ✅ | 🟡 | 🟡 | ✅ | ✅ | ❌ | | ADD1 | ❌ | ✅ | ✅ | ✅ | ❌ | ❌ | ✅ | ❌ | ❌ | -| ADD_ID | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | +| ADD_ID | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | | ARANGE | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | | ARGMAX | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ | | ARGSORT | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | -| CEIL | ❌ | ❌ | ✅ | 🟡 | ❌ | ❌ | ✅ | ❌ | ❌ | +| CEIL | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | | CLAMP | ❌ | ✅ | ✅ | ✅ | 🟡 | 🟡 | ✅ | 🟡 | ❌ | | CONCAT | ❌ | ✅ | ✅ | 🟡 | ✅ | 🟡 | 🟡 | ✅ | ❌ | | CONT | ❌ | 🟡 | ✅ | ✅ | ✅ | 🟡 | 🟡 | 🟡 | ❌ | -| CONV_2D | ❌ | ❌ | ✅ | ✅ | ❌ | ✅ | ❌ | ✅ | ❌ | +| CONV_2D | ❌ | ❌ | ✅ | ❌ | ❌ | ✅ | ❌ | ✅ | ❌ | | CONV_2D_DW | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ | | CONV_3D | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | | CONV_TRANSPOSE_1D | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ | @@ -37,12 +37,12 @@ Legend: | CROSS_ENTROPY_LOSS | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | | CROSS_ENTROPY_LOSS_BACK | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | | DIAG_MASK_INF | ❌ | ✅ | ✅ | ✅ | 🟡 | 🟡 | ✅ | ✅ | ❌ | -| DIV | ❌ | ✅ | ✅ | ❌ | 🟡 | 🟡 | ✅ | ✅ | ❌ | +| DIV | ❌ | ✅ | ✅ | ✅ | 🟡 | 🟡 | ✅ | ✅ | ❌ | | DUP | ❌ | ✅ | ✅ | 🟡 | 🟡 | 🟡 | ✅ | 🟡 | ❌ | | ELU | ❌ | ✅ | ✅ | 🟡 | 🟡 | ❌ | 🟡 | ❌ | ❌ | | EXP | ❌ | ✅ | ✅ | 🟡 | 🟡 | ❌ | 🟡 | ❌ | ❌ | | FLASH_ATTN_EXT | ❌ | 🟡 | ✅ | 🟡 | 🟡 | ❌ | ❌ | 🟡 | ❌ | -| FLOOR | ❌ | ❌ | ✅ | 🟡 | ❌ | ❌ | ✅ | ❌ | ❌ | +| FLOOR | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | | GATED_LINEAR_ATTN | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ✅ | ❌ | ❌ | | GEGLU | ❌ | ✅ | ✅ | ✅ | 🟡 | ✅ | ✅ | 🟡 | ❌ | | GEGLU_ERF | ❌ | ✅ | ✅ | ✅ | 🟡 | ✅ | ✅ | 🟡 | ❌ | @@ -57,22 +57,22 @@ Legend: | HARDSIGMOID | ❌ | ✅ | ✅ | 🟡 | 🟡 | ❌ | 🟡 | ❌ | ❌ | | HARDSWISH | ❌ | ✅ | ✅ | 🟡 | 🟡 | ❌ | 🟡 | ❌ | ❌ | | IM2COL | ❌ | ✅ | ✅ | ✅ | 🟡 | ✅ | ✅ | ✅ | ❌ | -| IM2COL_3D | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | +| IM2COL_3D | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | | L2_NORM | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ | | LEAKY_RELU | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ | | LOG | ❌ | ✅ | ✅ | ✅ | ❌ | ❌ | ✅ | ❌ | ❌ | | MEAN | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | -| MUL | ❌ | ✅ | ✅ | ❌ | 🟡 | 🟡 | ✅ | ✅ | ❌ | +| MUL | ❌ | ✅ | ✅ | ✅ | 🟡 | 🟡 | ✅ | ✅ | ❌ | | MUL_MAT | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | | MUL_MAT_ID | ❌ | 🟡 | ✅ | ✅ | ✅ | 🟡 | 🟡 | ✅ | ❌ | | NEG | ❌ | ✅ | ✅ | 🟡 | 🟡 | ❌ | 🟡 | ❌ | ❌ | | NORM | ❌ | ✅ | ✅ | ✅ | 🟡 | ✅ | ✅ | 🟡 | ❌ | | NORM_MUL_ADD | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | | OPT_STEP_ADAMW | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ | -| OPT_STEP_SGD | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | +| OPT_STEP_SGD | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | | OUT_PROD | 🟡 | ❌ | 🟡 | 🟡 | ❌ | ❌ | 🟡 | ❌ | ❌ | -| PAD | ❌ | ✅ | ✅ | 🟡 | ✅ | ✅ | 🟡 | ✅ | ❌ | -| PAD_REFLECT_1D | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ❌ | ❌ | +| PAD | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | 🟡 | ✅ | ❌ | +| PAD_REFLECT_1D | ❌ | ✅ | ✅ | ❌ | ✅ | ❌ | ✅ | ❌ | ❌ | | POOL_2D | ❌ | 🟡 | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ | | REGLU | ❌ | ✅ | ✅ | ✅ | 🟡 | ✅ | ✅ | 🟡 | ❌ | | RELU | ❌ | ✅ | ✅ | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | ❌ | @@ -80,15 +80,15 @@ Legend: | REPEAT_BACK | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ | | RMS_NORM | ❌ | ✅ | ✅ | ✅ | 🟡 | ✅ | ✅ | ✅ | ❌ | | RMS_NORM_BACK | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ✅ | ✅ | ❌ | -| RMS_NORM_MUL_ADD | ❌ | ✅ | ✅ | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | -| ROLL | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ | +| RMS_NORM_MUL_ADD | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | +| ROLL | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | | ROPE | ❌ | 🟡 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | | ROPE_BACK | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ | -| ROUND | ❌ | ❌ | ✅ | 🟡 | ❌ | ❌ | ✅ | ❌ | ❌ | +| ROUND | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | | RWKV_WKV6 | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ | | RWKV_WKV7 | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ | | SCALE | ❌ | 🟡 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | -| SET | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | +| SET | ❌ | ❌ | ✅ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | | SET_ROWS | ❌ | ❌ | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | ❌ | | SGN | ❌ | ✅ | ✅ | 🟡 | 🟡 | ❌ | 🟡 | ❌ | ❌ | | SIGMOID | ❌ | ✅ | ✅ | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | ❌ | @@ -103,14 +103,14 @@ Legend: | SSM_CONV | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | ❌ | ✅ | ❌ | | SSM_SCAN | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | ❌ | ✅ | ❌ | | STEP | ❌ | ✅ | ✅ | 🟡 | 🟡 | ❌ | 🟡 | ❌ | ❌ | -| SUB | ❌ | ✅ | ✅ | ❌ | 🟡 | 🟡 | ✅ | ✅ | ❌ | -| SUM | ❌ | ✅ | ✅ | 🟡 | ❌ | ❌ | ✅ | ✅ | ❌ | -| SUM_ROWS | ❌ | ✅ | ✅ | 🟡 | ✅ | ✅ | 🟡 | ✅ | ❌ | +| SUB | ❌ | ✅ | ✅ | ✅ | 🟡 | 🟡 | ✅ | ✅ | ❌ | +| SUM | ❌ | ✅ | ✅ | ✅ | ❌ | ❌ | ✅ | ✅ | ❌ | +| SUM_ROWS | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | 🟡 | ✅ | ❌ | | SWIGLU | ❌ | ✅ | ✅ | ✅ | 🟡 | ✅ | ✅ | 🟡 | ❌ | -| SWIGLU_OAI | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | +| SWIGLU_OAI | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | | TANH | ❌ | ✅ | ✅ | 🟡 | 🟡 | ✅ | 🟡 | 🟡 | ❌ | | TIMESTEP_EMBEDDING | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | | TOPK_MOE | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | -| TRUNC | ❌ | ❌ | ✅ | 🟡 | ❌ | ❌ | ✅ | ❌ | ❌ | +| TRUNC | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | | UPSCALE | ❌ | 🟡 | ✅ | ✅ | 🟡 | ✅ | 🟡 | ✅ | ❌ | | XIELU | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | diff --git a/docs/ops/CUDA.csv b/docs/ops/CUDA.csv index 8eb31f22d2c08..71e47977e31d1 100644 --- a/docs/ops/CUDA.csv +++ b/docs/ops/CUDA.csv @@ -29,16 +29,6 @@ "CUDA0","EXP","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","CUDA" "CUDA0","GELU_ERF","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","CUDA" "CUDA0","GELU_ERF","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","CUDA" -"CUDA0","XIELU","type=f16,ne_a=[128,2,2,2],v=0","support","0","no","CUDA" -"CUDA0","XIELU","type=f16,ne_a=[5,7,11,13],v=0","support","0","no","CUDA" -"CUDA0","FLOOR","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","CUDA" -"CUDA0","FLOOR","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","CUDA" -"CUDA0","CEIL","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","CUDA" -"CUDA0","CEIL","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","CUDA" -"CUDA0","ROUND","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","CUDA" -"CUDA0","ROUND","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","CUDA" -"CUDA0","TRUNC","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","CUDA" -"CUDA0","TRUNC","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","CUDA" "CUDA0","ABS","type=f16,ne_a=[128,2,2,2],v=1","support","0","no","CUDA" "CUDA0","ABS","type=f16,ne_a=[5,7,11,13],v=1","support","0","no","CUDA" "CUDA0","SGN","type=f16,ne_a=[128,2,2,2],v=1","support","0","no","CUDA" @@ -69,16 +59,6 @@ "CUDA0","EXP","type=f16,ne_a=[5,7,11,13],v=1","support","0","no","CUDA" "CUDA0","GELU_ERF","type=f16,ne_a=[128,2,2,2],v=1","support","0","no","CUDA" "CUDA0","GELU_ERF","type=f16,ne_a=[5,7,11,13],v=1","support","0","no","CUDA" -"CUDA0","XIELU","type=f16,ne_a=[128,2,2,2],v=1","support","0","no","CUDA" -"CUDA0","XIELU","type=f16,ne_a=[5,7,11,13],v=1","support","0","no","CUDA" -"CUDA0","FLOOR","type=f16,ne_a=[128,2,2,2],v=1","support","0","no","CUDA" -"CUDA0","FLOOR","type=f16,ne_a=[5,7,11,13],v=1","support","0","no","CUDA" -"CUDA0","CEIL","type=f16,ne_a=[128,2,2,2],v=1","support","0","no","CUDA" -"CUDA0","CEIL","type=f16,ne_a=[5,7,11,13],v=1","support","0","no","CUDA" -"CUDA0","ROUND","type=f16,ne_a=[128,2,2,2],v=1","support","0","no","CUDA" -"CUDA0","ROUND","type=f16,ne_a=[5,7,11,13],v=1","support","0","no","CUDA" -"CUDA0","TRUNC","type=f16,ne_a=[128,2,2,2],v=1","support","0","no","CUDA" -"CUDA0","TRUNC","type=f16,ne_a=[5,7,11,13],v=1","support","0","no","CUDA" "CUDA0","ABS","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","CUDA" "CUDA0","ABS","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","CUDA" "CUDA0","SGN","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","CUDA" @@ -109,16 +89,6 @@ "CUDA0","EXP","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","CUDA" "CUDA0","GELU_ERF","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","CUDA" "CUDA0","GELU_ERF","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","CUDA" -"CUDA0","XIELU","type=f32,ne_a=[128,2,2,2],v=0","support","0","no","CUDA" -"CUDA0","XIELU","type=f32,ne_a=[5,7,11,13],v=0","support","0","no","CUDA" -"CUDA0","FLOOR","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","CUDA" -"CUDA0","FLOOR","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","CUDA" -"CUDA0","CEIL","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","CUDA" -"CUDA0","CEIL","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","CUDA" -"CUDA0","ROUND","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","CUDA" -"CUDA0","ROUND","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","CUDA" -"CUDA0","TRUNC","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","CUDA" -"CUDA0","TRUNC","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","CUDA" "CUDA0","ABS","type=f32,ne_a=[128,2,2,2],v=1","support","0","no","CUDA" "CUDA0","ABS","type=f32,ne_a=[5,7,11,13],v=1","support","0","no","CUDA" "CUDA0","SGN","type=f32,ne_a=[128,2,2,2],v=1","support","0","no","CUDA" @@ -149,16 +119,6 @@ "CUDA0","EXP","type=f32,ne_a=[5,7,11,13],v=1","support","0","no","CUDA" "CUDA0","GELU_ERF","type=f32,ne_a=[128,2,2,2],v=1","support","0","no","CUDA" "CUDA0","GELU_ERF","type=f32,ne_a=[5,7,11,13],v=1","support","0","no","CUDA" -"CUDA0","XIELU","type=f32,ne_a=[128,2,2,2],v=1","support","0","no","CUDA" -"CUDA0","XIELU","type=f32,ne_a=[5,7,11,13],v=1","support","0","no","CUDA" -"CUDA0","FLOOR","type=f32,ne_a=[128,2,2,2],v=1","support","0","no","CUDA" -"CUDA0","FLOOR","type=f32,ne_a=[5,7,11,13],v=1","support","0","no","CUDA" -"CUDA0","CEIL","type=f32,ne_a=[128,2,2,2],v=1","support","0","no","CUDA" -"CUDA0","CEIL","type=f32,ne_a=[5,7,11,13],v=1","support","0","no","CUDA" -"CUDA0","ROUND","type=f32,ne_a=[128,2,2,2],v=1","support","0","no","CUDA" -"CUDA0","ROUND","type=f32,ne_a=[5,7,11,13],v=1","support","0","no","CUDA" -"CUDA0","TRUNC","type=f32,ne_a=[128,2,2,2],v=1","support","0","no","CUDA" -"CUDA0","TRUNC","type=f32,ne_a=[5,7,11,13],v=1","support","0","no","CUDA" "CUDA0","REGLU","type=f16,ne_a=[128,2,2,2],v=0,swapped=0","support","1","yes","CUDA" "CUDA0","REGLU","type=f16,ne_a=[5,7,11,13],v=0,swapped=0","support","1","yes","CUDA" "CUDA0","REGLU","type=f16,ne_a=[128,2,2,2],v=0,swapped=1","support","1","yes","CUDA" @@ -279,117 +239,99 @@ "CUDA0","GEGLU_QUICK","type=f32,ne_a=[5,7,11,13],v=1,swapped=1","support","1","yes","CUDA" "CUDA0","GEGLU_QUICK","type=f32,ne_a=[128,2,2,2],v=1,split","support","1","yes","CUDA" "CUDA0","GEGLU_QUICK","type=f32,ne_a=[5,7,11,13],v=1,split","support","1","yes","CUDA" -"CUDA0","SWIGLU_OAI","type=f32,ne_a=[128,2,2,2],v=0,alpha=0.500000,limit=2.000000","support","1","yes","CUDA" -"CUDA0","SWIGLU_OAI","type=f32,ne_a=[128,2,2,2],v=0,alpha=0.500000,limit=7.000000","support","1","yes","CUDA" -"CUDA0","SWIGLU_OAI","type=f32,ne_a=[128,2,2,2],v=0,alpha=1.702000,limit=2.000000","support","1","yes","CUDA" -"CUDA0","SWIGLU_OAI","type=f32,ne_a=[128,2,2,2],v=0,alpha=1.702000,limit=7.000000","support","1","yes","CUDA" -"CUDA0","SWIGLU_OAI","type=f32,ne_a=[128,2,2,2],v=1,alpha=0.500000,limit=2.000000","support","1","yes","CUDA" -"CUDA0","SWIGLU_OAI","type=f32,ne_a=[128,2,2,2],v=1,alpha=0.500000,limit=7.000000","support","1","yes","CUDA" -"CUDA0","SWIGLU_OAI","type=f32,ne_a=[128,2,2,2],v=1,alpha=1.702000,limit=2.000000","support","1","yes","CUDA" -"CUDA0","SWIGLU_OAI","type=f32,ne_a=[128,2,2,2],v=1,alpha=1.702000,limit=7.000000","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=f32,n=76800,m=5,r=4,be1=1,be2=2,v=0","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=f32,n=256,m=80000,r=70000,be1=2,be2=1,v=0","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=f32,n=256,m=5,r=4,be1=700,be2=100,v=0","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=q4_0,n=76800,m=5,r=4,be1=1,be2=2,v=0","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=q4_0,n=256,m=80000,r=70000,be1=2,be2=1,v=0","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=q4_0,n=256,m=5,r=4,be1=700,be2=100,v=0","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=f32,n=1,m=8,r=2,be1=1,be2=1,v=0","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=f32,n=256,m=5,r=4,be1=1,be2=1,v=0","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=f32,n=256,m=5,r=4,be1=1,be2=1,v=1","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=f32,n=256,m=5,r=4,be1=7,be2=1,v=0","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=f32,n=256,m=5,r=4,be1=7,be2=1,v=1","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=f16,n=256,m=5,r=4,be1=1,be2=1,v=0","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=f16,n=256,m=5,r=4,be1=1,be2=1,v=1","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=f16,n=256,m=5,r=4,be1=7,be2=1,v=0","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=f16,n=256,m=5,r=4,be1=7,be2=1,v=1","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=bf16,n=256,m=5,r=4,be1=1,be2=1,v=0","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=bf16,n=256,m=5,r=4,be1=1,be2=1,v=1","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=bf16,n=256,m=5,r=4,be1=7,be2=1,v=0","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=bf16,n=256,m=5,r=4,be1=7,be2=1,v=1","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=q4_0,n=256,m=5,r=4,be1=1,be2=1,v=0","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=q4_0,n=256,m=5,r=4,be1=1,be2=1,v=1","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=q4_0,n=256,m=5,r=4,be1=7,be2=1,v=0","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=q4_0,n=256,m=5,r=4,be1=7,be2=1,v=1","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=q4_1,n=256,m=5,r=4,be1=1,be2=1,v=0","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=q4_1,n=256,m=5,r=4,be1=1,be2=1,v=1","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=q4_1,n=256,m=5,r=4,be1=7,be2=1,v=0","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=q4_1,n=256,m=5,r=4,be1=7,be2=1,v=1","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=q5_0,n=256,m=5,r=4,be1=1,be2=1,v=0","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=q5_0,n=256,m=5,r=4,be1=1,be2=1,v=1","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=q5_0,n=256,m=5,r=4,be1=7,be2=1,v=0","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=q5_0,n=256,m=5,r=4,be1=7,be2=1,v=1","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=q5_1,n=256,m=5,r=4,be1=1,be2=1,v=0","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=q5_1,n=256,m=5,r=4,be1=1,be2=1,v=1","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=q5_1,n=256,m=5,r=4,be1=7,be2=1,v=0","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=q5_1,n=256,m=5,r=4,be1=7,be2=1,v=1","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=q8_0,n=256,m=5,r=4,be1=1,be2=1,v=0","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=q8_0,n=256,m=5,r=4,be1=1,be2=1,v=1","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=q8_0,n=256,m=5,r=4,be1=7,be2=1,v=0","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=q8_0,n=256,m=5,r=4,be1=7,be2=1,v=1","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=mxfp4,n=256,m=5,r=4,be1=1,be2=1,v=0","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=mxfp4,n=256,m=5,r=4,be1=1,be2=1,v=1","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=mxfp4,n=256,m=5,r=4,be1=7,be2=1,v=0","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=mxfp4,n=256,m=5,r=4,be1=7,be2=1,v=1","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=q2_K,n=256,m=5,r=4,be1=1,be2=1,v=0","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=q2_K,n=256,m=5,r=4,be1=1,be2=1,v=1","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=q2_K,n=256,m=5,r=4,be1=7,be2=1,v=0","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=q2_K,n=256,m=5,r=4,be1=7,be2=1,v=1","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=q3_K,n=256,m=5,r=4,be1=1,be2=1,v=0","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=q3_K,n=256,m=5,r=4,be1=1,be2=1,v=1","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=q3_K,n=256,m=5,r=4,be1=7,be2=1,v=0","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=q3_K,n=256,m=5,r=4,be1=7,be2=1,v=1","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=q4_K,n=256,m=5,r=4,be1=1,be2=1,v=0","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=q4_K,n=256,m=5,r=4,be1=1,be2=1,v=1","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=q4_K,n=256,m=5,r=4,be1=7,be2=1,v=0","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=q4_K,n=256,m=5,r=4,be1=7,be2=1,v=1","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=q5_K,n=256,m=5,r=4,be1=1,be2=1,v=0","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=q5_K,n=256,m=5,r=4,be1=1,be2=1,v=1","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=q5_K,n=256,m=5,r=4,be1=7,be2=1,v=0","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=q5_K,n=256,m=5,r=4,be1=7,be2=1,v=1","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=q6_K,n=256,m=5,r=4,be1=1,be2=1,v=0","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=q6_K,n=256,m=5,r=4,be1=1,be2=1,v=1","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=q6_K,n=256,m=5,r=4,be1=7,be2=1,v=0","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=q6_K,n=256,m=5,r=4,be1=7,be2=1,v=1","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=iq2_xxs,n=256,m=5,r=4,be1=1,be2=1,v=0","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=iq2_xxs,n=256,m=5,r=4,be1=1,be2=1,v=1","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=iq2_xxs,n=256,m=5,r=4,be1=7,be2=1,v=0","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=iq2_xxs,n=256,m=5,r=4,be1=7,be2=1,v=1","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=iq2_xs,n=256,m=5,r=4,be1=1,be2=1,v=0","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=iq2_xs,n=256,m=5,r=4,be1=1,be2=1,v=1","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=iq2_xs,n=256,m=5,r=4,be1=7,be2=1,v=0","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=iq2_xs,n=256,m=5,r=4,be1=7,be2=1,v=1","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=iq2_s,n=256,m=5,r=4,be1=1,be2=1,v=0","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=iq2_s,n=256,m=5,r=4,be1=1,be2=1,v=1","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=iq2_s,n=256,m=5,r=4,be1=7,be2=1,v=0","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=iq2_s,n=256,m=5,r=4,be1=7,be2=1,v=1","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=iq3_xxs,n=256,m=5,r=4,be1=1,be2=1,v=0","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=iq3_xxs,n=256,m=5,r=4,be1=1,be2=1,v=1","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=iq3_xxs,n=256,m=5,r=4,be1=7,be2=1,v=0","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=iq3_xxs,n=256,m=5,r=4,be1=7,be2=1,v=1","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=iq1_s,n=256,m=5,r=4,be1=1,be2=1,v=0","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=iq1_s,n=256,m=5,r=4,be1=1,be2=1,v=1","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=iq1_s,n=256,m=5,r=4,be1=7,be2=1,v=0","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=iq1_s,n=256,m=5,r=4,be1=7,be2=1,v=1","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=iq1_m,n=256,m=5,r=4,be1=1,be2=1,v=0","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=iq1_m,n=256,m=5,r=4,be1=1,be2=1,v=1","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=iq1_m,n=256,m=5,r=4,be1=7,be2=1,v=0","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=iq1_m,n=256,m=5,r=4,be1=7,be2=1,v=1","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=iq4_nl,n=256,m=5,r=4,be1=1,be2=1,v=0","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=iq4_nl,n=256,m=5,r=4,be1=1,be2=1,v=1","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=iq4_nl,n=256,m=5,r=4,be1=7,be2=1,v=0","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=iq4_nl,n=256,m=5,r=4,be1=7,be2=1,v=1","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=iq3_s,n=256,m=5,r=4,be1=1,be2=1,v=0","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=iq3_s,n=256,m=5,r=4,be1=1,be2=1,v=1","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=iq3_s,n=256,m=5,r=4,be1=7,be2=1,v=0","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=iq3_s,n=256,m=5,r=4,be1=7,be2=1,v=1","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=iq4_xs,n=256,m=5,r=4,be1=1,be2=1,v=0","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=iq4_xs,n=256,m=5,r=4,be1=1,be2=1,v=1","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=iq4_xs,n=256,m=5,r=4,be1=7,be2=1,v=0","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=iq4_xs,n=256,m=5,r=4,be1=7,be2=1,v=1","support","0","no","CUDA" -"CUDA0","GET_ROWS","type=i32,n=256,m=5,r=4,be1=1,be2=1,v=0","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=i32,n=256,m=5,r=4,be1=1,be2=1,v=1","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=i32,n=256,m=5,r=4,be1=7,be2=1,v=0","support","1","yes","CUDA" -"CUDA0","GET_ROWS","type=i32,n=256,m=5,r=4,be1=7,be2=1,v=1","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=f32,n=1,m=8,r=2,b=1,v=0","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=f32,n=256,m=5,r=4,b=1,v=0","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=f32,n=256,m=5,r=4,b=1,v=1","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=f32,n=256,m=5,r=4,b=7,v=0","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=f32,n=256,m=5,r=4,b=7,v=1","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=f16,n=256,m=5,r=4,b=1,v=0","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=f16,n=256,m=5,r=4,b=1,v=1","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=f16,n=256,m=5,r=4,b=7,v=0","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=f16,n=256,m=5,r=4,b=7,v=1","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=bf16,n=256,m=5,r=4,b=1,v=0","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=bf16,n=256,m=5,r=4,b=1,v=1","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=bf16,n=256,m=5,r=4,b=7,v=0","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=bf16,n=256,m=5,r=4,b=7,v=1","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=q4_0,n=256,m=5,r=4,b=1,v=0","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=q4_0,n=256,m=5,r=4,b=1,v=1","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=q4_0,n=256,m=5,r=4,b=7,v=0","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=q4_0,n=256,m=5,r=4,b=7,v=1","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=q4_1,n=256,m=5,r=4,b=1,v=0","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=q4_1,n=256,m=5,r=4,b=1,v=1","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=q4_1,n=256,m=5,r=4,b=7,v=0","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=q4_1,n=256,m=5,r=4,b=7,v=1","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=q5_0,n=256,m=5,r=4,b=1,v=0","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=q5_0,n=256,m=5,r=4,b=1,v=1","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=q5_0,n=256,m=5,r=4,b=7,v=0","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=q5_0,n=256,m=5,r=4,b=7,v=1","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=q5_1,n=256,m=5,r=4,b=1,v=0","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=q5_1,n=256,m=5,r=4,b=1,v=1","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=q5_1,n=256,m=5,r=4,b=7,v=0","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=q5_1,n=256,m=5,r=4,b=7,v=1","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=q8_0,n=256,m=5,r=4,b=1,v=0","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=q8_0,n=256,m=5,r=4,b=1,v=1","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=q8_0,n=256,m=5,r=4,b=7,v=0","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=q8_0,n=256,m=5,r=4,b=7,v=1","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=q2_K,n=256,m=5,r=4,b=1,v=0","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=q2_K,n=256,m=5,r=4,b=1,v=1","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=q2_K,n=256,m=5,r=4,b=7,v=0","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=q2_K,n=256,m=5,r=4,b=7,v=1","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=q3_K,n=256,m=5,r=4,b=1,v=0","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=q3_K,n=256,m=5,r=4,b=1,v=1","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=q3_K,n=256,m=5,r=4,b=7,v=0","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=q3_K,n=256,m=5,r=4,b=7,v=1","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=q4_K,n=256,m=5,r=4,b=1,v=0","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=q4_K,n=256,m=5,r=4,b=1,v=1","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=q4_K,n=256,m=5,r=4,b=7,v=0","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=q4_K,n=256,m=5,r=4,b=7,v=1","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=q5_K,n=256,m=5,r=4,b=1,v=0","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=q5_K,n=256,m=5,r=4,b=1,v=1","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=q5_K,n=256,m=5,r=4,b=7,v=0","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=q5_K,n=256,m=5,r=4,b=7,v=1","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=q6_K,n=256,m=5,r=4,b=1,v=0","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=q6_K,n=256,m=5,r=4,b=1,v=1","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=q6_K,n=256,m=5,r=4,b=7,v=0","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=q6_K,n=256,m=5,r=4,b=7,v=1","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=iq2_xxs,n=256,m=5,r=4,b=1,v=0","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=iq2_xxs,n=256,m=5,r=4,b=1,v=1","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=iq2_xxs,n=256,m=5,r=4,b=7,v=0","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=iq2_xxs,n=256,m=5,r=4,b=7,v=1","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=iq2_xs,n=256,m=5,r=4,b=1,v=0","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=iq2_xs,n=256,m=5,r=4,b=1,v=1","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=iq2_xs,n=256,m=5,r=4,b=7,v=0","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=iq2_xs,n=256,m=5,r=4,b=7,v=1","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=iq2_s,n=256,m=5,r=4,b=1,v=0","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=iq2_s,n=256,m=5,r=4,b=1,v=1","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=iq2_s,n=256,m=5,r=4,b=7,v=0","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=iq2_s,n=256,m=5,r=4,b=7,v=1","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=iq3_xxs,n=256,m=5,r=4,b=1,v=0","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=iq3_xxs,n=256,m=5,r=4,b=1,v=1","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=iq3_xxs,n=256,m=5,r=4,b=7,v=0","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=iq3_xxs,n=256,m=5,r=4,b=7,v=1","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=iq1_s,n=256,m=5,r=4,b=1,v=0","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=iq1_s,n=256,m=5,r=4,b=1,v=1","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=iq1_s,n=256,m=5,r=4,b=7,v=0","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=iq1_s,n=256,m=5,r=4,b=7,v=1","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=iq1_m,n=256,m=5,r=4,b=1,v=0","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=iq1_m,n=256,m=5,r=4,b=1,v=1","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=iq1_m,n=256,m=5,r=4,b=7,v=0","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=iq1_m,n=256,m=5,r=4,b=7,v=1","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=iq4_nl,n=256,m=5,r=4,b=1,v=0","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=iq4_nl,n=256,m=5,r=4,b=1,v=1","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=iq4_nl,n=256,m=5,r=4,b=7,v=0","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=iq4_nl,n=256,m=5,r=4,b=7,v=1","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=iq3_s,n=256,m=5,r=4,b=1,v=0","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=iq3_s,n=256,m=5,r=4,b=1,v=1","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=iq3_s,n=256,m=5,r=4,b=7,v=0","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=iq3_s,n=256,m=5,r=4,b=7,v=1","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=iq4_xs,n=256,m=5,r=4,b=1,v=0","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=iq4_xs,n=256,m=5,r=4,b=1,v=1","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=iq4_xs,n=256,m=5,r=4,b=7,v=0","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=iq4_xs,n=256,m=5,r=4,b=7,v=1","support","0","no","CUDA" +"CUDA0","GET_ROWS","type=i32,n=256,m=5,r=4,b=1,v=0","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=i32,n=256,m=5,r=4,b=1,v=1","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=i32,n=256,m=5,r=4,b=7,v=0","support","1","yes","CUDA" +"CUDA0","GET_ROWS","type=i32,n=256,m=5,r=4,b=7,v=1","support","1","yes","CUDA" "CUDA0","GET_ROWS_BACK","type=f32,n=1,m=8,r=2,b=1,v=0","support","1","yes","CUDA" "CUDA0","GET_ROWS_BACK","type=f32,n=256,m=5,r=4,b=1,v=0","support","1","yes","CUDA" "CUDA0","GET_ROWS_BACK","type=f32,n=256,m=5,r=4,b=1,v=1","support","1","yes","CUDA" @@ -407,8 +349,6 @@ "CUDA0","GET_ROWS_BACK","type=q5_1,n=256,m=5,r=4,b=1,v=1","support","0","no","CUDA" "CUDA0","GET_ROWS_BACK","type=q8_0,n=256,m=5,r=4,b=1,v=0","support","0","no","CUDA" "CUDA0","GET_ROWS_BACK","type=q8_0,n=256,m=5,r=4,b=1,v=1","support","0","no","CUDA" -"CUDA0","GET_ROWS_BACK","type=mxfp4,n=256,m=5,r=4,b=1,v=0","support","0","no","CUDA" -"CUDA0","GET_ROWS_BACK","type=mxfp4,n=256,m=5,r=4,b=1,v=1","support","0","no","CUDA" "CUDA0","GET_ROWS_BACK","type=q2_K,n=256,m=5,r=4,b=1,v=0","support","0","no","CUDA" "CUDA0","GET_ROWS_BACK","type=q2_K,n=256,m=5,r=4,b=1,v=1","support","0","no","CUDA" "CUDA0","GET_ROWS_BACK","type=q3_K,n=256,m=5,r=4,b=1,v=0","support","0","no","CUDA" @@ -439,309 +379,295 @@ "CUDA0","GET_ROWS_BACK","type=iq4_xs,n=256,m=5,r=4,b=1,v=1","support","0","no","CUDA" "CUDA0","GET_ROWS_BACK","type=i32,n=256,m=5,r=4,b=1,v=0","support","0","no","CUDA" "CUDA0","GET_ROWS_BACK","type=i32,n=256,m=5,r=4,b=1,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=f32,type_idx=i64,ne=[1,8,1,3],nr23=[1,1],r=2,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=f32,type_idx=i32,ne=[1,8,1,3],nr23=[1,1],r=2,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q8_0,type_idx=i32,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=f32,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=f32,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=f32,type_idx=i64,ne=[3,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=f32,type_idx=i64,ne=[31,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=f32,type_idx=i64,ne=[33,5,1,1],nr23=[2,3],r=1,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=f32,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=f32,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=f32,type_idx=i64,ne=[3,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=f32,type_idx=i64,ne=[31,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=f32,type_idx=i64,ne=[33,5,1,1],nr23=[2,3],r=1,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=f32,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=f32,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=f32,type_idx=i64,ne=[3,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=f32,type_idx=i64,ne=[31,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=f32,type_idx=i64,ne=[33,5,1,7],nr23=[2,3],r=1,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=f32,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=f32,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=f32,type_idx=i64,ne=[3,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=f32,type_idx=i64,ne=[31,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=f32,type_idx=i64,ne=[33,5,1,7],nr23=[2,3],r=1,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=f16,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=f16,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=f16,type_idx=i64,ne=[3,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=f16,type_idx=i64,ne=[31,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=f16,type_idx=i64,ne=[33,5,1,1],nr23=[2,3],r=1,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=f16,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=f16,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=f16,type_idx=i64,ne=[3,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=f16,type_idx=i64,ne=[31,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=f16,type_idx=i64,ne=[33,5,1,1],nr23=[2,3],r=1,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=f16,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=f16,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=f16,type_idx=i64,ne=[3,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=f16,type_idx=i64,ne=[31,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=f16,type_idx=i64,ne=[33,5,1,7],nr23=[2,3],r=1,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=f16,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=f16,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=f16,type_idx=i64,ne=[3,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=f16,type_idx=i64,ne=[31,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=f16,type_idx=i64,ne=[33,5,1,7],nr23=[2,3],r=1,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=bf16,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=bf16,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=bf16,type_idx=i64,ne=[3,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=bf16,type_idx=i64,ne=[31,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=bf16,type_idx=i64,ne=[33,5,1,1],nr23=[2,3],r=1,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=bf16,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=bf16,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=bf16,type_idx=i64,ne=[3,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=bf16,type_idx=i64,ne=[31,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=bf16,type_idx=i64,ne=[33,5,1,1],nr23=[2,3],r=1,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=bf16,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=bf16,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=bf16,type_idx=i64,ne=[3,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=bf16,type_idx=i64,ne=[31,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=bf16,type_idx=i64,ne=[33,5,1,7],nr23=[2,3],r=1,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=bf16,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=bf16,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=bf16,type_idx=i64,ne=[3,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=bf16,type_idx=i64,ne=[31,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=bf16,type_idx=i64,ne=[33,5,1,7],nr23=[2,3],r=1,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q4_0,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q4_0,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q4_0,type_idx=i64,ne=[96,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q4_0,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q4_0,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q4_0,type_idx=i64,ne=[96,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q4_0,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q4_0,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q4_0,type_idx=i64,ne=[96,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q4_0,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q4_0,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q4_0,type_idx=i64,ne=[96,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q4_1,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q4_1,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q4_1,type_idx=i64,ne=[96,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q4_1,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q4_1,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q4_1,type_idx=i64,ne=[96,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q4_1,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q4_1,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q4_1,type_idx=i64,ne=[96,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q4_1,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q4_1,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q4_1,type_idx=i64,ne=[96,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q5_0,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q5_0,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q5_0,type_idx=i64,ne=[96,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q5_0,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q5_0,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q5_0,type_idx=i64,ne=[96,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q5_0,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q5_0,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q5_0,type_idx=i64,ne=[96,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q5_0,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q5_0,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q5_0,type_idx=i64,ne=[96,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q5_1,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q5_1,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q5_1,type_idx=i64,ne=[96,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q5_1,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q5_1,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q5_1,type_idx=i64,ne=[96,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q5_1,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q5_1,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q5_1,type_idx=i64,ne=[96,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q5_1,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q5_1,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q5_1,type_idx=i64,ne=[96,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q8_0,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q8_0,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q8_0,type_idx=i64,ne=[96,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q8_0,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q8_0,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q8_0,type_idx=i64,ne=[96,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q8_0,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q8_0,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q8_0,type_idx=i64,ne=[96,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q8_0,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q8_0,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=q8_0,type_idx=i64,ne=[96,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=mxfp4,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=mxfp4,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=mxfp4,type_idx=i64,ne=[96,3,1,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=mxfp4,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=mxfp4,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=mxfp4,type_idx=i64,ne=[96,3,1,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=mxfp4,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=mxfp4,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=mxfp4,type_idx=i64,ne=[96,3,7,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=mxfp4,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=mxfp4,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=mxfp4,type_idx=i64,ne=[96,3,7,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q2_K,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q2_K,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q2_K,type_idx=i64,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q2_K,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q2_K,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q2_K,type_idx=i64,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q2_K,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q2_K,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q2_K,type_idx=i64,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q2_K,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q2_K,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q2_K,type_idx=i64,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q3_K,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q3_K,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q3_K,type_idx=i64,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q3_K,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q3_K,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q3_K,type_idx=i64,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q3_K,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q3_K,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q3_K,type_idx=i64,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q3_K,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q3_K,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q3_K,type_idx=i64,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q4_K,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q4_K,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q4_K,type_idx=i64,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q4_K,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q4_K,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q4_K,type_idx=i64,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q4_K,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q4_K,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q4_K,type_idx=i64,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q4_K,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q4_K,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q4_K,type_idx=i64,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q5_K,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q5_K,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q5_K,type_idx=i64,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q5_K,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q5_K,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q5_K,type_idx=i64,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q5_K,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q5_K,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q5_K,type_idx=i64,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q5_K,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q5_K,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q5_K,type_idx=i64,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q6_K,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q6_K,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q6_K,type_idx=i64,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q6_K,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q6_K,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q6_K,type_idx=i64,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q6_K,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q6_K,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q6_K,type_idx=i64,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q6_K,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q6_K,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=q6_K,type_idx=i64,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq2_xxs,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq2_xxs,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq2_xxs,type_idx=i64,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq2_xxs,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq2_xxs,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq2_xxs,type_idx=i64,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq2_xxs,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq2_xxs,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq2_xxs,type_idx=i64,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq2_xxs,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq2_xxs,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq2_xxs,type_idx=i64,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq2_xs,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq2_xs,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq2_xs,type_idx=i64,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq2_xs,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq2_xs,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq2_xs,type_idx=i64,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq2_xs,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq2_xs,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq2_xs,type_idx=i64,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq2_xs,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq2_xs,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq2_xs,type_idx=i64,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq2_s,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq2_s,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq2_s,type_idx=i64,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq2_s,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq2_s,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq2_s,type_idx=i64,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq2_s,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq2_s,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq2_s,type_idx=i64,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq2_s,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq2_s,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq2_s,type_idx=i64,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq3_xxs,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq3_xxs,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq3_xxs,type_idx=i64,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq3_xxs,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq3_xxs,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq3_xxs,type_idx=i64,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq3_xxs,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq3_xxs,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq3_xxs,type_idx=i64,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq3_xxs,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq3_xxs,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq3_xxs,type_idx=i64,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq1_s,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq1_s,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq1_s,type_idx=i64,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq1_s,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq1_s,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq1_s,type_idx=i64,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq1_s,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq1_s,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq1_s,type_idx=i64,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq1_s,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq1_s,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq1_s,type_idx=i64,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq1_m,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq1_m,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq1_m,type_idx=i64,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq1_m,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq1_m,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq1_m,type_idx=i64,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq1_m,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq1_m,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq1_m,type_idx=i64,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq1_m,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq1_m,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq1_m,type_idx=i64,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq4_nl,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=iq4_nl,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=iq4_nl,type_idx=i64,ne=[96,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=iq4_nl,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=iq4_nl,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=iq4_nl,type_idx=i64,ne=[96,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=iq4_nl,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=iq4_nl,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=iq4_nl,type_idx=i64,ne=[96,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=iq4_nl,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=iq4_nl,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=iq4_nl,type_idx=i64,ne=[96,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA" -"CUDA0","SET_ROWS","type=iq3_s,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq3_s,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq3_s,type_idx=i64,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq3_s,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq3_s,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq3_s,type_idx=i64,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq3_s,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq3_s,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq3_s,type_idx=i64,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq3_s,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq3_s,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq3_s,type_idx=i64,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq4_xs,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq4_xs,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq4_xs,type_idx=i64,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq4_xs,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq4_xs,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq4_xs,type_idx=i64,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq4_xs,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq4_xs,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq4_xs,type_idx=i64,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq4_xs,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq4_xs,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","no","CUDA" -"CUDA0","SET_ROWS","type=iq4_xs,type_idx=i64,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=f32,ne=[1,8,1,3],nr23=[1,1],r=2,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=f32,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=f32,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=f32,ne=[3,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=f32,ne=[31,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=f32,ne=[33,5,1,1],nr23=[2,3],r=1,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=f32,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=f32,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=f32,ne=[3,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=f32,ne=[31,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=f32,ne=[33,5,1,1],nr23=[2,3],r=1,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=f32,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=f32,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=f32,ne=[3,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=f32,ne=[31,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=f32,ne=[33,5,1,7],nr23=[2,3],r=1,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=f32,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=f32,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=f32,ne=[3,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=f32,ne=[31,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=f32,ne=[33,5,1,7],nr23=[2,3],r=1,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=f16,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=f16,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=f16,ne=[3,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=f16,ne=[31,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=f16,ne=[33,5,1,1],nr23=[2,3],r=1,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=f16,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=f16,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=f16,ne=[3,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=f16,ne=[31,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=f16,ne=[33,5,1,1],nr23=[2,3],r=1,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=f16,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=f16,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=f16,ne=[3,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=f16,ne=[31,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=f16,ne=[33,5,1,7],nr23=[2,3],r=1,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=f16,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=f16,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=f16,ne=[3,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=f16,ne=[31,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=f16,ne=[33,5,1,7],nr23=[2,3],r=1,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=bf16,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=bf16,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=bf16,ne=[3,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=bf16,ne=[31,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=bf16,ne=[33,5,1,1],nr23=[2,3],r=1,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=bf16,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=bf16,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=bf16,ne=[3,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=bf16,ne=[31,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=bf16,ne=[33,5,1,1],nr23=[2,3],r=1,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=bf16,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=bf16,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=bf16,ne=[3,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=bf16,ne=[31,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=bf16,ne=[33,5,1,7],nr23=[2,3],r=1,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=bf16,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=bf16,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=bf16,ne=[3,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=bf16,ne=[31,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=bf16,ne=[33,5,1,7],nr23=[2,3],r=1,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q4_0,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q4_0,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q4_0,ne=[96,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q4_0,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q4_0,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q4_0,ne=[96,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q4_0,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q4_0,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q4_0,ne=[96,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q4_0,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q4_0,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q4_0,ne=[96,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q4_1,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q4_1,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q4_1,ne=[96,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q4_1,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q4_1,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q4_1,ne=[96,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q4_1,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q4_1,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q4_1,ne=[96,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q4_1,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q4_1,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q4_1,ne=[96,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q5_0,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q5_0,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q5_0,ne=[96,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q5_0,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q5_0,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q5_0,ne=[96,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q5_0,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q5_0,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q5_0,ne=[96,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q5_0,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q5_0,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q5_0,ne=[96,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q5_1,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q5_1,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q5_1,ne=[96,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q5_1,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q5_1,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q5_1,ne=[96,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q5_1,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q5_1,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q5_1,ne=[96,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q5_1,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q5_1,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q5_1,ne=[96,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q8_0,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q8_0,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q8_0,ne=[96,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q8_0,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q8_0,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q8_0,ne=[96,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q8_0,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q8_0,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q8_0,ne=[96,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q8_0,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q8_0,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q8_0,ne=[96,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=q2_K,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q2_K,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q2_K,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q2_K,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q2_K,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q2_K,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q2_K,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q2_K,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q2_K,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q2_K,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q2_K,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q2_K,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q3_K,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q3_K,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q3_K,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q3_K,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q3_K,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q3_K,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q3_K,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q3_K,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q3_K,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q3_K,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q3_K,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q3_K,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q4_K,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q4_K,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q4_K,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q4_K,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q4_K,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q4_K,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q4_K,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q4_K,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q4_K,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q4_K,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q4_K,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q4_K,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q5_K,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q5_K,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q5_K,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q5_K,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q5_K,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q5_K,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q5_K,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q5_K,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q5_K,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q5_K,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q5_K,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q5_K,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q6_K,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q6_K,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q6_K,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q6_K,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q6_K,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q6_K,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q6_K,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q6_K,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q6_K,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q6_K,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q6_K,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=q6_K,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq2_xxs,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq2_xxs,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq2_xxs,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq2_xxs,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq2_xxs,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq2_xxs,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq2_xxs,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq2_xxs,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq2_xxs,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq2_xxs,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq2_xxs,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq2_xxs,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq2_xs,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq2_xs,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq2_xs,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq2_xs,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq2_xs,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq2_xs,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq2_xs,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq2_xs,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq2_xs,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq2_xs,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq2_xs,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq2_xs,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq2_s,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq2_s,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq2_s,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq2_s,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq2_s,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq2_s,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq2_s,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq2_s,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq2_s,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq2_s,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq2_s,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq2_s,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq3_xxs,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq3_xxs,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq3_xxs,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq3_xxs,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq3_xxs,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq3_xxs,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq3_xxs,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq3_xxs,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq3_xxs,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq3_xxs,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq3_xxs,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq3_xxs,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq1_s,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq1_s,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq1_s,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq1_s,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq1_s,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq1_s,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq1_s,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq1_s,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq1_s,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq1_s,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq1_s,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq1_s,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq1_m,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq1_m,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq1_m,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq1_m,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq1_m,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq1_m,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq1_m,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq1_m,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq1_m,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq1_m,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq1_m,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq1_m,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq4_nl,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=iq4_nl,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=iq4_nl,ne=[96,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=iq4_nl,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=iq4_nl,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=iq4_nl,ne=[96,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=iq4_nl,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=iq4_nl,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=iq4_nl,ne=[96,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=iq4_nl,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=iq4_nl,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=iq4_nl,ne=[96,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA" +"CUDA0","SET_ROWS","type=iq3_s,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq3_s,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq3_s,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq3_s,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq3_s,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq3_s,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq3_s,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq3_s,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq3_s,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq3_s,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq3_s,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq3_s,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq4_xs,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq4_xs,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq4_xs,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq4_xs,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq4_xs,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq4_xs,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq4_xs,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq4_xs,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq4_xs,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq4_xs,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq4_xs,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","no","CUDA" +"CUDA0","SET_ROWS","type=iq4_xs,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA" "CUDA0","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=1,s1=1,p0=0,p1=0","support","1","yes","CUDA" "CUDA0","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=1,s1=1,p0=0,p1=1","support","1","yes","CUDA" "CUDA0","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=1,s1=1,p0=1,p1=0","support","1","yes","CUDA" @@ -957,3887 +883,794 @@ "CUDA0","IM2COL","type_input=f32,type_kernel=f16,dst_type=f16,ne_input=[12,12,1,2560],ne_kernel=[3,3,1,2560],s0=1,s1=1,p0=1,p1=1,d0=1,d1=1,is_2D=1","support","1","yes","CUDA" "CUDA0","IM2COL","type_input=f32,type_kernel=f16,dst_type=f16,ne_input=[12,12,2,2560],ne_kernel=[3,3,2,2560],s0=1,s1=1,p0=1,p1=1,d0=1,d1=1,is_2D=1","support","1","yes","CUDA" "CUDA0","IM2COL","type_input=f32,type_kernel=f16,dst_type=f16,ne_input=[5,5,1,32],ne_kernel=[3,4,1,32],s0=1,s1=1,p0=0,p1=0,d0=1,d1=1,is_2D=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[10,10,10,9],ne_kernel=[3,3,3,1],IC=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f16,dst_type=f32,ne_input=[10,10,10,9],ne_kernel=[3,3,3,1],IC=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f16,dst_type=f16,ne_input=[10,10,10,9],ne_kernel=[3,3,3,1],IC=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA" -"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" -"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA" "CUDA0","CONV_2D_DW","ne_input=[17,34,9,1],ne_kernel=[3,3,1,9],stride=1,padding=0,dilation=1,cwhn=0","support","1","yes","CUDA" "CUDA0","CONV_2D_DW","ne_input=[17,34,9,1],ne_kernel=[3,3,1,9],stride=1,padding=0,dilation=1,cwhn=1","support","1","yes","CUDA" "CUDA0","CONV_2D_DW","ne_input=[32,8,64,1],ne_kernel=[3,3,1,64],stride=2,padding=1,dilation=1,cwhn=0","support","1","yes","CUDA" "CUDA0","CONV_2D_DW","ne_input=[32,8,64,1],ne_kernel=[3,3,1,64],stride=2,padding=1,dilation=1,cwhn=1","support","1","yes","CUDA" -"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=4,ID=8,IH=8,IW=8,OC=8,KD=1,KH=1,KW=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA" -"CUDA0","CONV_3D","N=1,IC=4,ID=8,IH=8,IW=8,OC=8,KD=1,KH=1,KW=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA" "CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[1,1,1,1],s0=1,p0=0,d0=1","support","1","yes","CUDA" "CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[1,1,1,1],s0=2,p0=0,d0=1","support","1","yes","CUDA" "CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[1,1,1,1],s0=3,p0=0,d0=1","support","1","yes","CUDA" @@ -4959,7 +1792,6 @@ "CUDA0","COUNT_EQUAL","type=f32,ne=[4,500,1,1]","support","1","yes","CUDA" "CUDA0","COUNT_EQUAL","type=f32,ne=[4,5000,1,1]","support","1","yes","CUDA" "CUDA0","ARGMAX","type=f32,ne=[32,1,1,1]","support","1","yes","CUDA" -"CUDA0","ARGMAX","type=f32,ne=[32,513,1,1]","support","1","yes","CUDA" "CUDA0","ARGMAX","type=f32,ne=[100,10,1,1]","support","1","yes","CUDA" "CUDA0","ARGMAX","type=f32,ne=[1024,10,1,1]","support","1","yes","CUDA" "CUDA0","ARGMAX","type=f32,ne=[1024,12,1,1]","support","1","yes","CUDA" @@ -4999,12 +1831,12 @@ "CUDA0","DUP","type=f16,ne=[10,10,5,1],permute=[1,0,2,3]","support","1","yes","CUDA" "CUDA0","DUP","type=i16,ne=[10,8,3,1],permute=[0,2,1,3]","support","0","no","CUDA" "CUDA0","DUP","type=i16,ne=[10,8,3,1],permute=[1,2,0,3]","support","0","no","CUDA" -"CUDA0","SET","type_src=f32,type_dst=f32,ne=[6,5,4,3],dim=1","support","1","yes","CUDA" -"CUDA0","SET","type_src=f32,type_dst=f32,ne=[6,5,4,3],dim=2","support","1","yes","CUDA" -"CUDA0","SET","type_src=f32,type_dst=f32,ne=[6,5,4,3],dim=3","support","1","yes","CUDA" -"CUDA0","SET","type_src=i32,type_dst=i32,ne=[6,5,4,3],dim=1","support","1","yes","CUDA" -"CUDA0","SET","type_src=i32,type_dst=i32,ne=[6,5,4,3],dim=2","support","1","yes","CUDA" -"CUDA0","SET","type_src=i32,type_dst=i32,ne=[6,5,4,3],dim=3","support","1","yes","CUDA" +"CUDA0","SET","type_src=f32,type_dst=f32,ne=[6,5,4,3],dim=1","support","0","no","CUDA" +"CUDA0","SET","type_src=f32,type_dst=f32,ne=[6,5,4,3],dim=2","support","0","no","CUDA" +"CUDA0","SET","type_src=f32,type_dst=f32,ne=[6,5,4,3],dim=3","support","0","no","CUDA" +"CUDA0","SET","type_src=i32,type_dst=i32,ne=[6,5,4,3],dim=1","support","0","no","CUDA" +"CUDA0","SET","type_src=i32,type_dst=i32,ne=[6,5,4,3],dim=2","support","0","no","CUDA" +"CUDA0","SET","type_src=i32,type_dst=i32,ne=[6,5,4,3],dim=3","support","0","no","CUDA" "CUDA0","CPY","type_src=f32,type_dst=f32,ne=[1,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA" "CUDA0","CPY","type_src=f32,type_dst=f32,ne=[1,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CUDA" "CUDA0","CPY","type_src=f32,type_dst=f32,ne=[1,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","yes","CUDA" @@ -5077,15 +1909,6 @@ "CUDA0","CPY","type_src=q8_0,type_dst=q8_0,ne=[96,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA" "CUDA0","CPY","type_src=q8_0,type_dst=q8_0,ne=[96,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA" "CUDA0","CPY","type_src=q8_0,type_dst=q8_0,ne=[96,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","CPY","type_src=mxfp4,type_dst=mxfp4,ne=[32,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA" -"CUDA0","CPY","type_src=mxfp4,type_dst=mxfp4,ne=[32,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA" -"CUDA0","CPY","type_src=mxfp4,type_dst=mxfp4,ne=[32,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","CPY","type_src=mxfp4,type_dst=mxfp4,ne=[64,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA" -"CUDA0","CPY","type_src=mxfp4,type_dst=mxfp4,ne=[64,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA" -"CUDA0","CPY","type_src=mxfp4,type_dst=mxfp4,ne=[64,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","CPY","type_src=mxfp4,type_dst=mxfp4,ne=[96,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA" -"CUDA0","CPY","type_src=mxfp4,type_dst=mxfp4,ne=[96,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA" -"CUDA0","CPY","type_src=mxfp4,type_dst=mxfp4,ne=[96,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","no","CUDA" "CUDA0","CPY","type_src=q2_K,type_dst=q2_K,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA" "CUDA0","CPY","type_src=q2_K,type_dst=q2_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA" "CUDA0","CPY","type_src=q2_K,type_dst=q2_K,ne=[256,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","no","CUDA" @@ -5228,8 +2051,6 @@ "CUDA0","CPY","type_src=f16,type_dst=q5_1,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA" "CUDA0","CPY","type_src=f16,type_dst=q8_0,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CUDA" "CUDA0","CPY","type_src=f16,type_dst=q8_0,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA" -"CUDA0","CPY","type_src=f16,type_dst=mxfp4,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CUDA" -"CUDA0","CPY","type_src=f16,type_dst=mxfp4,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA" "CUDA0","CPY","type_src=f16,type_dst=q2_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CUDA" "CUDA0","CPY","type_src=f16,type_dst=q2_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA" "CUDA0","CPY","type_src=f16,type_dst=q3_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CUDA" @@ -5274,8 +2095,6 @@ "CUDA0","CPY","type_src=bf16,type_dst=q5_1,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA" "CUDA0","CPY","type_src=bf16,type_dst=q8_0,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CUDA" "CUDA0","CPY","type_src=bf16,type_dst=q8_0,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA" -"CUDA0","CPY","type_src=bf16,type_dst=mxfp4,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CUDA" -"CUDA0","CPY","type_src=bf16,type_dst=mxfp4,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA" "CUDA0","CPY","type_src=bf16,type_dst=q2_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CUDA" "CUDA0","CPY","type_src=bf16,type_dst=q2_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA" "CUDA0","CPY","type_src=bf16,type_dst=q3_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CUDA" @@ -5320,8 +2139,6 @@ "CUDA0","CPY","type_src=f32,type_dst=q5_1,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CUDA" "CUDA0","CPY","type_src=f32,type_dst=q8_0,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA" "CUDA0","CPY","type_src=f32,type_dst=q8_0,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CUDA" -"CUDA0","CPY","type_src=f32,type_dst=mxfp4,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CUDA" -"CUDA0","CPY","type_src=f32,type_dst=mxfp4,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA" "CUDA0","CPY","type_src=f32,type_dst=q2_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CUDA" "CUDA0","CPY","type_src=f32,type_dst=q2_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA" "CUDA0","CPY","type_src=f32,type_dst=q3_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CUDA" @@ -5366,8 +2183,6 @@ "CUDA0","CPY","type_src=q5_1,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CUDA" "CUDA0","CPY","type_src=q8_0,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA" "CUDA0","CPY","type_src=q8_0,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CUDA" -"CUDA0","CPY","type_src=mxfp4,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CUDA" -"CUDA0","CPY","type_src=mxfp4,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA" "CUDA0","CPY","type_src=q2_K,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CUDA" "CUDA0","CPY","type_src=q2_K,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA" "CUDA0","CPY","type_src=q3_K,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CUDA" @@ -5400,10 +2215,6 @@ "CUDA0","CPY","type_src=f16,type_dst=f32,ne=[256,2,3,4],permute_src=[1,0,2,3],permute_dst=[0,0,0,0]","support","1","yes","CUDA" "CUDA0","CPY","type_src=f32,type_dst=f16,ne=[256,2,3,4],permute_src=[1,0,2,3],permute_dst=[0,0,0,0]","support","1","yes","CUDA" "CUDA0","CPY","type_src=f32,type_dst=f32,ne=[256,2,3,4],permute_src=[1,0,2,3],permute_dst=[0,0,0,0]","support","1","yes","CUDA" -"CUDA0","CPY","type_src=f32,type_dst=i32,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA" -"CUDA0","CPY","type_src=f32,type_dst=i32,ne=[256,2,3,4],permute_src=[1,0,2,3],permute_dst=[0,0,0,0]","support","1","yes","CUDA" -"CUDA0","CPY","type_src=i32,type_dst=f32,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA" -"CUDA0","CPY","type_src=i32,type_dst=f32,ne=[256,2,3,4],permute_src=[1,0,2,3],permute_dst=[0,0,0,0]","support","1","yes","CUDA" "CUDA0","CONT","type=f32,ne=[10,10,10,1]","support","1","yes","CUDA" "CUDA0","CONT","type=f32,ne=[2,1,1,1]","support","1","yes","CUDA" "CUDA0","CONT","type=f32,ne=[2,1,3,5]","support","1","yes","CUDA" @@ -5414,37 +2225,259 @@ "CUDA0","CONT","type=bf16,ne=[2,1,1,1]","support","1","yes","CUDA" "CUDA0","CONT","type=bf16,ne=[2,1,3,5]","support","1","yes","CUDA" "CUDA0","CONT","type=bf16,ne=[2,3,5,7]","support","1","yes","CUDA" +"CUDA0","ADD","type=f16,ne=[1,1,8,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","SUB","type=f16,ne=[1,1,8,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","MUL","type=f16,ne=[1,1,8,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","DIV","type=f16,ne=[1,1,8,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","ADD","type=f16,ne=[1,1,1,1],nr=[32,1,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","SUB","type=f16,ne=[1,1,1,1],nr=[32,1,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","MUL","type=f16,ne=[1,1,1,1],nr=[32,1,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","DIV","type=f16,ne=[1,1,1,1],nr=[32,1,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","ADD","type=f16,ne=[1,1,320,320],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","SUB","type=f16,ne=[1,1,320,320],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","MUL","type=f16,ne=[1,1,320,320],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","DIV","type=f16,ne=[1,1,320,320],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","ADD","type=f16,ne=[10,5,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","SUB","type=f16,ne=[10,5,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","MUL","type=f16,ne=[10,5,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","DIV","type=f16,ne=[10,5,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","ADD","type=f16,ne=[10,5,4,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","SUB","type=f16,ne=[10,5,4,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","MUL","type=f16,ne=[10,5,4,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","DIV","type=f16,ne=[10,5,4,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","ADD","type=f16,ne=[10,5,4,3],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","SUB","type=f16,ne=[10,5,4,3],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","MUL","type=f16,ne=[10,5,4,3],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","DIV","type=f16,ne=[10,5,4,3],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","ADD","type=f16,ne=[10,5,4,3],nr=[2,1,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","SUB","type=f16,ne=[10,5,4,3],nr=[2,1,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","MUL","type=f16,ne=[10,5,4,3],nr=[2,1,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","DIV","type=f16,ne=[10,5,4,3],nr=[2,1,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","ADD","type=f16,ne=[10,5,4,3],nr=[1,2,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","SUB","type=f16,ne=[10,5,4,3],nr=[1,2,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","MUL","type=f16,ne=[10,5,4,3],nr=[1,2,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","DIV","type=f16,ne=[10,5,4,3],nr=[1,2,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","ADD","type=f16,ne=[10,5,4,3],nr=[1,1,2,1],nf=1","support","1","yes","CUDA" +"CUDA0","SUB","type=f16,ne=[10,5,4,3],nr=[1,1,2,1],nf=1","support","1","yes","CUDA" +"CUDA0","MUL","type=f16,ne=[10,5,4,3],nr=[1,1,2,1],nf=1","support","1","yes","CUDA" +"CUDA0","DIV","type=f16,ne=[10,5,4,3],nr=[1,1,2,1],nf=1","support","1","yes","CUDA" +"CUDA0","ADD","type=f16,ne=[10,5,4,3],nr=[1,1,1,2],nf=1","support","1","yes","CUDA" +"CUDA0","SUB","type=f16,ne=[10,5,4,3],nr=[1,1,1,2],nf=1","support","1","yes","CUDA" +"CUDA0","MUL","type=f16,ne=[10,5,4,3],nr=[1,1,1,2],nf=1","support","1","yes","CUDA" +"CUDA0","DIV","type=f16,ne=[10,5,4,3],nr=[1,1,1,2],nf=1","support","1","yes","CUDA" +"CUDA0","ADD","type=f16,ne=[10,5,4,3],nr=[1,1,2,2],nf=1","support","1","yes","CUDA" +"CUDA0","SUB","type=f16,ne=[10,5,4,3],nr=[1,1,2,2],nf=1","support","1","yes","CUDA" +"CUDA0","MUL","type=f16,ne=[10,5,4,3],nr=[1,1,2,2],nf=1","support","1","yes","CUDA" +"CUDA0","DIV","type=f16,ne=[10,5,4,3],nr=[1,1,2,2],nf=1","support","1","yes","CUDA" +"CUDA0","ADD","type=f16,ne=[10,5,4,3],nr=[1,2,2,2],nf=1","support","1","yes","CUDA" +"CUDA0","SUB","type=f16,ne=[10,5,4,3],nr=[1,2,2,2],nf=1","support","1","yes","CUDA" +"CUDA0","MUL","type=f16,ne=[10,5,4,3],nr=[1,2,2,2],nf=1","support","1","yes","CUDA" +"CUDA0","DIV","type=f16,ne=[10,5,4,3],nr=[1,2,2,2],nf=1","support","1","yes","CUDA" +"CUDA0","ADD","type=f16,ne=[10,5,4,3],nr=[2,2,2,2],nf=1","support","1","yes","CUDA" +"CUDA0","SUB","type=f16,ne=[10,5,4,3],nr=[2,2,2,2],nf=1","support","1","yes","CUDA" +"CUDA0","MUL","type=f16,ne=[10,5,4,3],nr=[2,2,2,2],nf=1","support","1","yes","CUDA" +"CUDA0","DIV","type=f16,ne=[10,5,4,3],nr=[2,2,2,2],nf=1","support","1","yes","CUDA" +"CUDA0","ADD","type=f16,ne=[1280,1,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","SUB","type=f16,ne=[1280,1,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","MUL","type=f16,ne=[1280,1,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","DIV","type=f16,ne=[1280,1,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","ADD","type=f16,ne=[1280,1,1,1],nr=[1,16,16,1],nf=1","support","1","yes","CUDA" +"CUDA0","SUB","type=f16,ne=[1280,1,1,1],nr=[1,16,16,1],nf=1","support","1","yes","CUDA" +"CUDA0","MUL","type=f16,ne=[1280,1,1,1],nr=[1,16,16,1],nf=1","support","1","yes","CUDA" +"CUDA0","DIV","type=f16,ne=[1280,1,1,1],nr=[1,16,16,1],nf=1","support","1","yes","CUDA" +"CUDA0","ADD","type=f16,ne=[1280,16,16,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","SUB","type=f16,ne=[1280,16,16,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","MUL","type=f16,ne=[1280,16,16,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","DIV","type=f16,ne=[1280,16,16,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","ADD","type=f16,ne=[1280,1,1,1],nr=[1,256,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","SUB","type=f16,ne=[1280,1,1,1],nr=[1,256,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","MUL","type=f16,ne=[1280,1,1,1],nr=[1,256,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","DIV","type=f16,ne=[1280,1,1,1],nr=[1,256,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","ADD","type=f16,ne=[1,1,1280,1],nr=[16,16,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","SUB","type=f16,ne=[1,1,1280,1],nr=[16,16,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","MUL","type=f16,ne=[1,1,1280,1],nr=[16,16,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","DIV","type=f16,ne=[1,1,1280,1],nr=[16,16,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","ADD","type=f16,ne=[16,16,1280,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","SUB","type=f16,ne=[16,16,1280,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","MUL","type=f16,ne=[16,16,1280,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","DIV","type=f16,ne=[16,16,1280,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","ADD","type=f16,ne=[1,1,1920,1],nr=[16,16,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","SUB","type=f16,ne=[1,1,1920,1],nr=[16,16,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","MUL","type=f16,ne=[1,1,1920,1],nr=[16,16,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","DIV","type=f16,ne=[1,1,1920,1],nr=[16,16,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","ADD","type=f16,ne=[1,1,2560,1],nr=[16,16,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","SUB","type=f16,ne=[1,1,2560,1],nr=[16,16,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","MUL","type=f16,ne=[1,1,2560,1],nr=[16,16,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","DIV","type=f16,ne=[1,1,2560,1],nr=[16,16,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","ADD","type=f16,ne=[1,1,1280,1],nr=[32,32,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","SUB","type=f16,ne=[1,1,1280,1],nr=[32,32,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","MUL","type=f16,ne=[1,1,1280,1],nr=[32,32,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","DIV","type=f16,ne=[1,1,1280,1],nr=[32,32,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","ADD","type=f16,ne=[1,1,1920,1],nr=[32,32,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","SUB","type=f16,ne=[1,1,1920,1],nr=[32,32,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","MUL","type=f16,ne=[1,1,1920,1],nr=[32,32,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","DIV","type=f16,ne=[1,1,1920,1],nr=[32,32,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","ADD","type=f16,ne=[1,1,640,1],nr=[32,32,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","SUB","type=f16,ne=[1,1,640,1],nr=[32,32,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","MUL","type=f16,ne=[1,1,640,1],nr=[32,32,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","DIV","type=f16,ne=[1,1,640,1],nr=[32,32,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","ADD","type=f16,ne=[5120,1,1,1],nr=[1,256,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","SUB","type=f16,ne=[5120,1,1,1],nr=[1,256,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","MUL","type=f16,ne=[5120,1,1,1],nr=[1,256,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","DIV","type=f16,ne=[5120,1,1,1],nr=[1,256,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","ADD","type=f16,ne=[640,1,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","SUB","type=f16,ne=[640,1,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","MUL","type=f16,ne=[640,1,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","DIV","type=f16,ne=[640,1,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","ADD","type=f32,ne=[1,1,8,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","SUB","type=f32,ne=[1,1,8,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","MUL","type=f32,ne=[1,1,8,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","DIV","type=f32,ne=[1,1,8,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","ADD","type=f32,ne=[1,1,1,1],nr=[32,1,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","SUB","type=f32,ne=[1,1,1,1],nr=[32,1,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","MUL","type=f32,ne=[1,1,1,1],nr=[32,1,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","DIV","type=f32,ne=[1,1,1,1],nr=[32,1,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","ADD","type=f32,ne=[1,1,320,320],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","SUB","type=f32,ne=[1,1,320,320],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","MUL","type=f32,ne=[1,1,320,320],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","DIV","type=f32,ne=[1,1,320,320],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","ADD","type=f32,ne=[10,5,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","SUB","type=f32,ne=[10,5,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","MUL","type=f32,ne=[10,5,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","DIV","type=f32,ne=[10,5,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","ADD","type=f32,ne=[10,5,4,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","SUB","type=f32,ne=[10,5,4,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","MUL","type=f32,ne=[10,5,4,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","DIV","type=f32,ne=[10,5,4,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","ADD","type=f32,ne=[10,5,4,3],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","SUB","type=f32,ne=[10,5,4,3],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","MUL","type=f32,ne=[10,5,4,3],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","DIV","type=f32,ne=[10,5,4,3],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","ADD","type=f32,ne=[10,5,4,3],nr=[2,1,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","SUB","type=f32,ne=[10,5,4,3],nr=[2,1,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","MUL","type=f32,ne=[10,5,4,3],nr=[2,1,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","DIV","type=f32,ne=[10,5,4,3],nr=[2,1,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","ADD","type=f32,ne=[10,5,4,3],nr=[1,2,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","SUB","type=f32,ne=[10,5,4,3],nr=[1,2,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","MUL","type=f32,ne=[10,5,4,3],nr=[1,2,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","DIV","type=f32,ne=[10,5,4,3],nr=[1,2,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","ADD","type=f32,ne=[10,5,4,3],nr=[1,1,2,1],nf=1","support","1","yes","CUDA" +"CUDA0","SUB","type=f32,ne=[10,5,4,3],nr=[1,1,2,1],nf=1","support","1","yes","CUDA" +"CUDA0","MUL","type=f32,ne=[10,5,4,3],nr=[1,1,2,1],nf=1","support","1","yes","CUDA" +"CUDA0","DIV","type=f32,ne=[10,5,4,3],nr=[1,1,2,1],nf=1","support","1","yes","CUDA" +"CUDA0","ADD","type=f32,ne=[10,5,4,3],nr=[1,1,1,2],nf=1","support","1","yes","CUDA" +"CUDA0","SUB","type=f32,ne=[10,5,4,3],nr=[1,1,1,2],nf=1","support","1","yes","CUDA" +"CUDA0","MUL","type=f32,ne=[10,5,4,3],nr=[1,1,1,2],nf=1","support","1","yes","CUDA" +"CUDA0","DIV","type=f32,ne=[10,5,4,3],nr=[1,1,1,2],nf=1","support","1","yes","CUDA" +"CUDA0","ADD","type=f32,ne=[10,5,4,3],nr=[1,1,2,2],nf=1","support","1","yes","CUDA" +"CUDA0","SUB","type=f32,ne=[10,5,4,3],nr=[1,1,2,2],nf=1","support","1","yes","CUDA" +"CUDA0","MUL","type=f32,ne=[10,5,4,3],nr=[1,1,2,2],nf=1","support","1","yes","CUDA" +"CUDA0","DIV","type=f32,ne=[10,5,4,3],nr=[1,1,2,2],nf=1","support","1","yes","CUDA" +"CUDA0","ADD","type=f32,ne=[10,5,4,3],nr=[1,2,2,2],nf=1","support","1","yes","CUDA" +"CUDA0","SUB","type=f32,ne=[10,5,4,3],nr=[1,2,2,2],nf=1","support","1","yes","CUDA" +"CUDA0","MUL","type=f32,ne=[10,5,4,3],nr=[1,2,2,2],nf=1","support","1","yes","CUDA" +"CUDA0","DIV","type=f32,ne=[10,5,4,3],nr=[1,2,2,2],nf=1","support","1","yes","CUDA" +"CUDA0","ADD","type=f32,ne=[10,5,4,3],nr=[2,2,2,2],nf=1","support","1","yes","CUDA" +"CUDA0","SUB","type=f32,ne=[10,5,4,3],nr=[2,2,2,2],nf=1","support","1","yes","CUDA" +"CUDA0","MUL","type=f32,ne=[10,5,4,3],nr=[2,2,2,2],nf=1","support","1","yes","CUDA" +"CUDA0","DIV","type=f32,ne=[10,5,4,3],nr=[2,2,2,2],nf=1","support","1","yes","CUDA" +"CUDA0","ADD","type=f32,ne=[1280,1,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","SUB","type=f32,ne=[1280,1,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","MUL","type=f32,ne=[1280,1,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","DIV","type=f32,ne=[1280,1,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","ADD","type=f32,ne=[1280,1,1,1],nr=[1,16,16,1],nf=1","support","1","yes","CUDA" +"CUDA0","SUB","type=f32,ne=[1280,1,1,1],nr=[1,16,16,1],nf=1","support","1","yes","CUDA" +"CUDA0","MUL","type=f32,ne=[1280,1,1,1],nr=[1,16,16,1],nf=1","support","1","yes","CUDA" +"CUDA0","DIV","type=f32,ne=[1280,1,1,1],nr=[1,16,16,1],nf=1","support","1","yes","CUDA" +"CUDA0","ADD","type=f32,ne=[1280,16,16,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","SUB","type=f32,ne=[1280,16,16,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","MUL","type=f32,ne=[1280,16,16,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","DIV","type=f32,ne=[1280,16,16,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","ADD","type=f32,ne=[1280,1,1,1],nr=[1,256,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","SUB","type=f32,ne=[1280,1,1,1],nr=[1,256,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","MUL","type=f32,ne=[1280,1,1,1],nr=[1,256,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","DIV","type=f32,ne=[1280,1,1,1],nr=[1,256,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","ADD","type=f32,ne=[1,1,1280,1],nr=[16,16,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","SUB","type=f32,ne=[1,1,1280,1],nr=[16,16,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","MUL","type=f32,ne=[1,1,1280,1],nr=[16,16,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","DIV","type=f32,ne=[1,1,1280,1],nr=[16,16,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","ADD","type=f32,ne=[16,16,1280,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","SUB","type=f32,ne=[16,16,1280,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","MUL","type=f32,ne=[16,16,1280,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","DIV","type=f32,ne=[16,16,1280,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","ADD","type=f32,ne=[1,1,1920,1],nr=[16,16,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","SUB","type=f32,ne=[1,1,1920,1],nr=[16,16,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","MUL","type=f32,ne=[1,1,1920,1],nr=[16,16,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","DIV","type=f32,ne=[1,1,1920,1],nr=[16,16,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","ADD","type=f32,ne=[1,1,2560,1],nr=[16,16,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","SUB","type=f32,ne=[1,1,2560,1],nr=[16,16,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","MUL","type=f32,ne=[1,1,2560,1],nr=[16,16,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","DIV","type=f32,ne=[1,1,2560,1],nr=[16,16,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","ADD","type=f32,ne=[1,1,1280,1],nr=[32,32,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","SUB","type=f32,ne=[1,1,1280,1],nr=[32,32,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","MUL","type=f32,ne=[1,1,1280,1],nr=[32,32,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","DIV","type=f32,ne=[1,1,1280,1],nr=[32,32,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","ADD","type=f32,ne=[1,1,1920,1],nr=[32,32,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","SUB","type=f32,ne=[1,1,1920,1],nr=[32,32,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","MUL","type=f32,ne=[1,1,1920,1],nr=[32,32,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","DIV","type=f32,ne=[1,1,1920,1],nr=[32,32,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","ADD","type=f32,ne=[1,1,640,1],nr=[32,32,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","SUB","type=f32,ne=[1,1,640,1],nr=[32,32,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","MUL","type=f32,ne=[1,1,640,1],nr=[32,32,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","DIV","type=f32,ne=[1,1,640,1],nr=[32,32,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","ADD","type=f32,ne=[5120,1,1,1],nr=[1,256,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","SUB","type=f32,ne=[5120,1,1,1],nr=[1,256,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","MUL","type=f32,ne=[5120,1,1,1],nr=[1,256,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","DIV","type=f32,ne=[5120,1,1,1],nr=[1,256,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","ADD","type=f32,ne=[640,1,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","SUB","type=f32,ne=[640,1,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","MUL","type=f32,ne=[640,1,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","DIV","type=f32,ne=[640,1,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA" +"CUDA0","ADD","type=f32,ne=[10,5,4,3],nr=[2,1,1,1],nf=2","support","1","yes","CUDA" +"CUDA0","ADD","type=f32,ne=[16,5,4,3],nr=[1,2,1,1],nf=3","support","1","yes","CUDA" +"CUDA0","ADD","type=f32,ne=[10,5,4,3],nr=[1,1,2,1],nf=4","support","1","yes","CUDA" +"CUDA0","ADD","type=f32,ne=[16,5,4,3],nr=[1,1,1,2],nf=5","support","1","yes","CUDA" +"CUDA0","ADD","type=f32,ne=[10,5,4,3],nr=[1,1,2,2],nf=6","support","1","yes","CUDA" +"CUDA0","ADD","type=f32,ne=[10,5,4,3],nr=[1,2,2,2],nf=7","support","1","yes","CUDA" +"CUDA0","ADD","type=f32,ne=[16,5,4,3],nr=[2,2,2,2],nf=8","support","1","yes","CUDA" "CUDA0","ADD1","type=f32,ne=[10,5,4,3]","support","1","yes","CUDA" -"CUDA0","SCALE","type=f32,ne=[10,10,10,10],scale=2.000000,bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SCALE","type=f32,ne=[10,10,10,10],scale=2.000000,bias=1.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SCALE","type=f32,ne=[10,10,10,10],scale=2.000000,bias=1.000000,inplace=1","support","1","yes","CUDA" -"CUDA0","SCALE","type=f32,ne=[100,10,10,10],scale=2.000000,bias=1.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SCALE","type=f32,ne=[10,10,10,10],scale=2.000000,bias=0.000000","support","1","yes","CUDA" +"CUDA0","SCALE","type=f32,ne=[10,10,10,10],scale=2.000000,bias=1.000000","support","1","yes","CUDA" "CUDA0","SILU_BACK","type=f32,ne=[64,5,4,3],eps=0.000001","support","1","yes","CUDA" "CUDA0","NORM","type=f32,ne=[64,5,4,3],v=0,eps=0.000000","support","1","yes","CUDA" -"CUDA0","RMS_NORM","type=f32,ne=[64,5,4,3],v=0,eps=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","RMS_NORM","type=f32,ne=[64,5,4,3],v=0,eps=0.000000","support","1","yes","CUDA" "CUDA0","NORM","type=f32,ne=[64,5,4,3],v=1,eps=0.000000","support","1","yes","CUDA" -"CUDA0","RMS_NORM","type=f32,ne=[64,5,4,3],v=1,eps=0.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","RMS_NORM","type=f32,ne=[64,5,4,3],v=1,eps=0.000000","support","1","yes","CUDA" "CUDA0","RMS_NORM_BACK","type=f32,ne=[64,5,4,3],eps=0.000000","support","1","yes","CUDA" "CUDA0","L2_NORM","type=f32,ne=[64,5,4,3]","support","1","yes","CUDA" "CUDA0","NORM","type=f32,ne=[64,5,4,3],v=0,eps=0.000001","support","1","yes","CUDA" -"CUDA0","RMS_NORM","type=f32,ne=[64,5,4,3],v=0,eps=0.000001,inplace=0","support","1","yes","CUDA" +"CUDA0","RMS_NORM","type=f32,ne=[64,5,4,3],v=0,eps=0.000001","support","1","yes","CUDA" "CUDA0","NORM","type=f32,ne=[64,5,4,3],v=1,eps=0.000001","support","1","yes","CUDA" -"CUDA0","RMS_NORM","type=f32,ne=[64,5,4,3],v=1,eps=0.000001,inplace=0","support","1","yes","CUDA" +"CUDA0","RMS_NORM","type=f32,ne=[64,5,4,3],v=1,eps=0.000001","support","1","yes","CUDA" "CUDA0","RMS_NORM_BACK","type=f32,ne=[64,5,4,3],eps=0.000001","support","1","yes","CUDA" "CUDA0","L2_NORM","type=f32,ne=[64,5,4,3]","support","1","yes","CUDA" "CUDA0","NORM","type=f32,ne=[64,5,4,3],v=0,eps=0.000100","support","1","yes","CUDA" -"CUDA0","RMS_NORM","type=f32,ne=[64,5,4,3],v=0,eps=0.000100,inplace=0","support","1","yes","CUDA" +"CUDA0","RMS_NORM","type=f32,ne=[64,5,4,3],v=0,eps=0.000100","support","1","yes","CUDA" "CUDA0","NORM","type=f32,ne=[64,5,4,3],v=1,eps=0.000100","support","1","yes","CUDA" -"CUDA0","RMS_NORM","type=f32,ne=[64,5,4,3],v=1,eps=0.000100,inplace=0","support","1","yes","CUDA" +"CUDA0","RMS_NORM","type=f32,ne=[64,5,4,3],v=1,eps=0.000100","support","1","yes","CUDA" "CUDA0","RMS_NORM_BACK","type=f32,ne=[64,5,4,3],eps=0.000100","support","1","yes","CUDA" "CUDA0","L2_NORM","type=f32,ne=[64,5,4,3]","support","1","yes","CUDA" "CUDA0","NORM","type=f32,ne=[64,5,4,3],v=0,eps=0.100000","support","1","yes","CUDA" -"CUDA0","RMS_NORM","type=f32,ne=[64,5,4,3],v=0,eps=0.100000,inplace=0","support","1","yes","CUDA" +"CUDA0","RMS_NORM","type=f32,ne=[64,5,4,3],v=0,eps=0.100000","support","1","yes","CUDA" "CUDA0","NORM","type=f32,ne=[64,5,4,3],v=1,eps=0.100000","support","1","yes","CUDA" -"CUDA0","RMS_NORM","type=f32,ne=[64,5,4,3],v=1,eps=0.100000,inplace=0","support","1","yes","CUDA" +"CUDA0","RMS_NORM","type=f32,ne=[64,5,4,3],v=1,eps=0.100000","support","1","yes","CUDA" "CUDA0","RMS_NORM_BACK","type=f32,ne=[64,5,4,3],eps=0.100000","support","1","yes","CUDA" "CUDA0","L2_NORM","type=f32,ne=[64,5,4,3]","support","1","yes","CUDA" -"CUDA0","RMS_NORM","type=f32,ne=[64,5,4,3],v=0,eps=0.000001,inplace=1","support","1","yes","CUDA" +"CUDA0","RMS_NORM_MUL_ADD","type=f32,ne=[64,5,4,3],eps=0.000000,broadcast=0","support","1","yes","CUDA" +"CUDA0","RMS_NORM_MUL_ADD","type=f32,ne=[64,5,4,3],eps=0.000000,broadcast=1","support","1","yes","CUDA" +"CUDA0","RMS_NORM_MUL_ADD","type=f32,ne=[64,5,4,3],eps=0.000001,broadcast=0","support","1","yes","CUDA" +"CUDA0","RMS_NORM_MUL_ADD","type=f32,ne=[64,5,4,3],eps=0.000001,broadcast=1","support","1","yes","CUDA" +"CUDA0","RMS_NORM_MUL_ADD","type=f32,ne=[64,5,4,3],eps=0.000100,broadcast=0","support","1","yes","CUDA" +"CUDA0","RMS_NORM_MUL_ADD","type=f32,ne=[64,5,4,3],eps=0.000100,broadcast=1","support","1","yes","CUDA" +"CUDA0","RMS_NORM_MUL_ADD","type=f32,ne=[64,5,4,3],eps=0.100000,broadcast=0","support","1","yes","CUDA" +"CUDA0","RMS_NORM_MUL_ADD","type=f32,ne=[64,5,4,3],eps=0.100000,broadcast=1","support","1","yes","CUDA" +"CUDA0","RMS_NORM_MUL_ADD","type=f32,ne=[64,5,4,3],eps=1.000000,broadcast=0","support","1","yes","CUDA" +"CUDA0","RMS_NORM_MUL_ADD","type=f32,ne=[64,5,4,3],eps=1.000000,broadcast=1","support","1","yes","CUDA" "CUDA0","L2_NORM","type=f32,ne=[64,5,4,3]","support","1","yes","CUDA" "CUDA0","SSM_CONV","type=f32,ne_a=[4,1024,1,1],ne_b=[3,1024,1,1]","support","1","yes","CUDA" "CUDA0","SSM_CONV","type=f32,ne_a=[8,1024,1,1],ne_b=[3,1024,1,1]","support","1","yes","CUDA" @@ -5479,1851 +2512,1243 @@ "CUDA0","GATED_LINEAR_ATTN","type=f32,head_count=32,head_size=64,n_seq_tokens=32,n_seqs=1","support","1","yes","CUDA" "CUDA0","GATED_LINEAR_ATTN","type=f32,head_count=32,head_size=64,n_seq_tokens=32,n_seqs=4","support","1","yes","CUDA" "CUDA0","GATED_LINEAR_ATTN","type=f32,head_count=32,head_size=64,n_seq_tokens=128,n_seqs=4","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=256,bs=[1536,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=256,bs=[1536,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=256,bs=[1536,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=8,k=256,bs=[1536,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=8,k=256,bs=[1536,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=8,k=256,bs=[1536,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=8,k=256,bs=[1536,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=8,k=256,bs=[1536,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=8,k=256,bs=[1536,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=8,k=256,bs=[1536,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=8,k=256,bs=[1536,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=8,k=256,bs=[1536,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=8,k=256,bs=[1536,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=8,k=256,bs=[1536,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=8,k=256,bs=[1536,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=8,k=256,bs=[1536,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","0","no","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=32,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=1,k=32,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=1,k=32,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=32,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=1,k=32,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=1,k=1,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=64,n=2,k=128,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=83,n=2,k=128,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=64,n=2,k=64,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=83,n=2,k=64,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=64,n=45,k=128,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=45,k=64,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=193,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=67,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=64,n=77,k=77,bs=[12,1],nr=[1,1],per=[0,1,2,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[1,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[1,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[1,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[1,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[1,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[1,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[1,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[1,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[1,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[1,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[1,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[1,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[1,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[1,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[1,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[1,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[1,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[1,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[1,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[1,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[1,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[1,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[1,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[1,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[1,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[1,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[1,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[1,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[1,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[1,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[1,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[1,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[1,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[1,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[1,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[1,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[1,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[1,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[1,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[1,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[1,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[1,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[1,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[1,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[1,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[1,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[1,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[1,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[2,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[2,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[2,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[2,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[2,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[2,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[2,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[2,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[2,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[2,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[2,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[2,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[2,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[2,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[2,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[2,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[2,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[2,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[2,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[2,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[2,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[2,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[2,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[2,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[2,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[2,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[2,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[2,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[2,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[2,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[2,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[2,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[2,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[2,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[2,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[2,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[4,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[4,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[4,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[4,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[4,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[4,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[4,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[4,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[4,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[4,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[4,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[4,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[4,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[4,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[4,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[4,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[4,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[4,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[4,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[4,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[4,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[4,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[4,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[4,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[4,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[4,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[4,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[4,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[4,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[4,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[4,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[4,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[4,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[4,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[4,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[4,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[4,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[4,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[4,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[4,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[4,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[4,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[4,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[4,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[4,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[4,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[4,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[4,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[8,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[8,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[8,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[8,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[8,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[8,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[8,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[8,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[8,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[8,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[8,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[8,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[8,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[8,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[8,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[8,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[8,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[8,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[8,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[8,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[8,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[8,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[8,3],nr=[1,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[8,3],nr=[1,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[8,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[8,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[8,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[8,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[8,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[8,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[8,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[8,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[8,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[8,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[8,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[8,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[8,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[8,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[8,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[8,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[8,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[8,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[8,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[8,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[8,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[8,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[8,3],nr=[4,1],per=[0,2,1,3],v=0,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[8,3],nr=[4,1],per=[0,1,2,3],v=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=16,n_used=16,b=0,m=32,n=1024,k=16,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=2,n_used=2,b=0,m=32,n=8192,k=64,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=16,n_used=16,b=0,m=50,n=200,k=64,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=16,n_used=16,b=1,m=32,n=1024,k=16,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=2,n_used=2,b=1,m=32,n=8192,k=64,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=16,n_used=16,b=1,m=50,n=200,k=64,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=1,n_used=1,b=0,m=8,n=16,k=1,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=32,n_used=2,b=0,m=2880,n=32,k=2880,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=17,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=17,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=17,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=17,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=17,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=17,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=17,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=17,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=17,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=17,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=17,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=17,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=17,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=17,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=17,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=17,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=17,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=17,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=17,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=17,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=17,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=17,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=17,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=17,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=17,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=17,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=17,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=17,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=17,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=17,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=17,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=17,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=17,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=17,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=17,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=17,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=17,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=17,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=17,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=17,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=17,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=17,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=17,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=17,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=17,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=17,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=17,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=17,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=17,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=17,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=17,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=17,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=17,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=17,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=17,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=17,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=17,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=17,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=17,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=17,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=17,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=17,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=17,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=17,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=17,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=17,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=17,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=17,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=17,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=17,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=17,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=17,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=17,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=17,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=17,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=17,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=17,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=17,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=17,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=17,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=17,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=17,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=17,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=17,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=17,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=17,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=17,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=17,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=17,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=17,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=17,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=17,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=17,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=17,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=4,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=5,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=17,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=129,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=4,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=5,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=17,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=129,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q5_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q5_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q5_1,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q5_1,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q2_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q2_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q3_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q3_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q5_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q5_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q6_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=q6_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_xs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_s,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq2_s,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq3_xxs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq3_xxs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq1_s,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq1_s,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq1_m,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq1_m,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq4_nl,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq4_nl,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq3_s,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq3_s,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq4_xs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=iq4_xs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=bf16,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256,o=1","support","1","yes","CUDA" -"CUDA0","MUL_MAT_ID","type_a=bf16,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256,o=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=32,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=1,k=32,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=1,k=32,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=32,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=1,k=32,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=1,k=1,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=64,n=2,k=128,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=83,n=2,k=128,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=64,n=2,k=64,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=83,n=2,k=64,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=64,n=45,k=128,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=45,k=64,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=193,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=67,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA" +"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=16,n_used=16,b=0,m=32,n=1024,k=16","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=16,n_used=16,b=1,m=32,n=1024,k=16","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=1,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=32,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=129,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=1,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=32,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=129,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=129,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=1,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=32,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=129,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=1,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=32,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=129,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=1,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=32,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=129,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=1,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=32,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=129,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=1,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=32,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=129,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=129,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=1,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=32,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=129,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=1,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=32,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=129,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=1,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=32,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=129,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=1,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=32,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=129,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=1,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=32,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=129,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=129,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=1,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=32,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=129,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=1,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=32,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=129,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=1,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=32,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=129,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=1,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=32,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=129,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=1,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=32,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=129,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=129,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=1,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=32,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=129,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=1,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=32,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=129,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=1,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=32,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=129,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=1,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=32,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=129,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=1,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=32,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=129,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=129,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=1,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=32,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=129,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=1,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=32,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=129,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=1,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=32,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=129,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=1,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=32,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=129,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=1,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=32,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=129,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=129,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=1,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=32,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=129,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=1,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=32,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=129,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=1,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=32,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=129,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=1,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=32,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=129,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=1,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=32,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=129,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=129,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=1,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=32,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=129,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=1,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=32,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=129,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=1,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=32,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=129,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=1,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=32,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=129,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=1,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=32,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=129,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=129,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=1,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=32,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=129,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=1,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=32,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=129,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=1,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=32,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=129,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=1,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=32,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=129,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=1,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=32,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=129,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=129,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=1,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=32,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=129,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=1,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=32,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=129,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=1,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=32,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=129,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=1,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=32,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=129,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=1,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=32,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=129,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=129,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=1,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=32,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=129,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=1,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=32,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=129,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=1,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=32,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=129,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=1,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=32,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=129,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=1,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=32,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=129,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=129,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=1,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=32,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=129,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=1,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=32,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=129,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=1,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=32,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=129,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=1,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=32,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=129,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=1,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=32,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=129,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=129,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=1,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=32,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=129,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=1,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=32,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=129,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=1,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=32,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=129,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=1,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=32,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=129,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=1,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=32,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=129,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=129,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=1,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=32,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=129,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=1,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=32,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=129,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=1,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=32,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=129,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=1,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=32,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=129,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=1,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=32,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=129,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=129,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=1,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=32,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=129,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=1,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=32,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=129,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=1,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=32,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=129,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q5_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q5_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q5_1,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q5_1,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q2_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q2_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q3_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q3_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q5_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q5_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q6_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=q6_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_xs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_s,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq2_s,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq3_xxs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq3_xxs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq1_s,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq1_s,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq1_m,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq1_m,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq4_nl,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq4_nl,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq3_s,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq3_s,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq4_xs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=iq4_xs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=bf16,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA" +"CUDA0","MUL_MAT_ID","type_a=bf16,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA" "CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","1","yes","CUDA" "CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","1","yes","CUDA" "CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","1","yes","CUDA" @@ -8092,134 +4517,6 @@ "CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA" "CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA" "CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA" -"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA" "CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA" "CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA" "CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA" @@ -8348,796 +4645,548 @@ "CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA" "CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA" "CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA" -"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=4,n_experts_used=1,n_token=1","support","1","yes","CUDA" -"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=4,n_experts_used=1,n_token=32","support","1","yes","CUDA" -"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=4,n_experts_used=1,n_token=129","support","1","yes","CUDA" -"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=4,n_experts_used=1,n_token=1","support","1","yes","CUDA" -"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=4,n_experts_used=1,n_token=32","support","1","yes","CUDA" -"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=4,n_experts_used=1,n_token=129","support","1","yes","CUDA" -"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=4,n_experts_used=2,n_token=1","support","1","yes","CUDA" -"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=4,n_experts_used=2,n_token=32","support","1","yes","CUDA" -"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=4,n_experts_used=2,n_token=129","support","1","yes","CUDA" -"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=4,n_experts_used=2,n_token=1","support","1","yes","CUDA" -"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=4,n_experts_used=2,n_token=32","support","1","yes","CUDA" -"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=4,n_experts_used=2,n_token=129","support","1","yes","CUDA" -"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=4,n_experts_used=4,n_token=1","support","1","yes","CUDA" -"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=4,n_experts_used=4,n_token=32","support","1","yes","CUDA" -"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=4,n_experts_used=4,n_token=129","support","1","yes","CUDA" -"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=4,n_experts_used=4,n_token=1","support","1","yes","CUDA" -"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=4,n_experts_used=4,n_token=32","support","1","yes","CUDA" -"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=4,n_experts_used=4,n_token=129","support","1","yes","CUDA" -"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=8,n_experts_used=1,n_token=1","support","1","yes","CUDA" -"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=8,n_experts_used=1,n_token=32","support","1","yes","CUDA" -"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=8,n_experts_used=1,n_token=129","support","1","yes","CUDA" -"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=8,n_experts_used=1,n_token=1","support","1","yes","CUDA" -"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=8,n_experts_used=1,n_token=32","support","1","yes","CUDA" -"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=8,n_experts_used=1,n_token=129","support","1","yes","CUDA" -"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=8,n_experts_used=2,n_token=1","support","1","yes","CUDA" -"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=8,n_experts_used=2,n_token=32","support","1","yes","CUDA" -"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=8,n_experts_used=2,n_token=129","support","1","yes","CUDA" -"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=8,n_experts_used=2,n_token=1","support","1","yes","CUDA" -"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=8,n_experts_used=2,n_token=32","support","1","yes","CUDA" -"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=8,n_experts_used=2,n_token=129","support","1","yes","CUDA" -"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=8,n_experts_used=4,n_token=1","support","1","yes","CUDA" -"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=8,n_experts_used=4,n_token=32","support","1","yes","CUDA" -"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=8,n_experts_used=4,n_token=129","support","1","yes","CUDA" -"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=8,n_experts_used=4,n_token=1","support","1","yes","CUDA" -"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=8,n_experts_used=4,n_token=32","support","1","yes","CUDA" -"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=8,n_experts_used=4,n_token=129","support","1","yes","CUDA" "CUDA0","SQR","type=f16,ne=[10,5,4,3]","support","1","yes","CUDA" "CUDA0","SQRT","type=f16,ne=[10,3,3,2]","support","1","yes","CUDA" "CUDA0","LOG","type=f16,ne=[10,5,4,3]","support","1","yes","CUDA" "CUDA0","SIN","type=f16,ne=[10,2,2,2]","support","1","yes","CUDA" "CUDA0","COS","type=f16,ne=[10,2,2,2]","support","1","yes","CUDA" "CUDA0","CLAMP","type=f16,ne=[10,5,4,3],min=-0.500000,max=0.500000","support","1","yes","CUDA" -"CUDA0","LEAKY_RELU","type=f16,ne_a=[10,5,4,3],negative_slope=0.100000","support","1","yes","CUDA" -"CUDA0","FLOOR","type=f16,ne=[10,2,2,2]","support","1","yes","CUDA" -"CUDA0","CEIL","type=f16,ne=[10,2,2,2]","support","1","yes","CUDA" -"CUDA0","ROUND","type=f16,ne=[10,2,2,2]","support","1","yes","CUDA" -"CUDA0","TRUNC","type=f16,ne=[10,2,2,2]","support","1","yes","CUDA" -"CUDA0","SQR","type=f16,ne=[7,1,5,3]","support","1","yes","CUDA" -"CUDA0","SQRT","type=f16,ne=[7,1,5,3]","support","1","yes","CUDA" -"CUDA0","LOG","type=f16,ne=[7,1,5,3]","support","1","yes","CUDA" -"CUDA0","SIN","type=f16,ne=[7,1,5,3]","support","1","yes","CUDA" -"CUDA0","COS","type=f16,ne=[7,1,5,3]","support","1","yes","CUDA" -"CUDA0","CLAMP","type=f16,ne=[7,1,5,3],min=-0.500000,max=0.500000","support","1","yes","CUDA" -"CUDA0","LEAKY_RELU","type=f16,ne_a=[7,1,5,3],negative_slope=0.100000","support","1","yes","CUDA" -"CUDA0","FLOOR","type=f16,ne=[7,1,5,3]","support","1","yes","CUDA" -"CUDA0","CEIL","type=f16,ne=[7,1,5,3]","support","1","yes","CUDA" -"CUDA0","ROUND","type=f16,ne=[7,1,5,3]","support","1","yes","CUDA" -"CUDA0","TRUNC","type=f16,ne=[7,1,5,3]","support","1","yes","CUDA" "CUDA0","SQR","type=f32,ne=[10,5,4,3]","support","1","yes","CUDA" "CUDA0","SQRT","type=f32,ne=[10,3,3,2]","support","1","yes","CUDA" "CUDA0","LOG","type=f32,ne=[10,5,4,3]","support","1","yes","CUDA" "CUDA0","SIN","type=f32,ne=[10,2,2,2]","support","1","yes","CUDA" "CUDA0","COS","type=f32,ne=[10,2,2,2]","support","1","yes","CUDA" "CUDA0","CLAMP","type=f32,ne=[10,5,4,3],min=-0.500000,max=0.500000","support","1","yes","CUDA" -"CUDA0","LEAKY_RELU","type=f32,ne_a=[10,5,4,3],negative_slope=0.100000","support","1","yes","CUDA" -"CUDA0","FLOOR","type=f32,ne=[10,2,2,2]","support","1","yes","CUDA" -"CUDA0","CEIL","type=f32,ne=[10,2,2,2]","support","1","yes","CUDA" -"CUDA0","ROUND","type=f32,ne=[10,2,2,2]","support","1","yes","CUDA" -"CUDA0","TRUNC","type=f32,ne=[10,2,2,2]","support","1","yes","CUDA" -"CUDA0","SQR","type=f32,ne=[7,1,5,3]","support","1","yes","CUDA" -"CUDA0","SQRT","type=f32,ne=[7,1,5,3]","support","1","yes","CUDA" -"CUDA0","LOG","type=f32,ne=[7,1,5,3]","support","1","yes","CUDA" -"CUDA0","SIN","type=f32,ne=[7,1,5,3]","support","1","yes","CUDA" -"CUDA0","COS","type=f32,ne=[7,1,5,3]","support","1","yes","CUDA" -"CUDA0","CLAMP","type=f32,ne=[7,1,5,3],min=-0.500000,max=0.500000","support","1","yes","CUDA" -"CUDA0","LEAKY_RELU","type=f32,ne_a=[7,1,5,3],negative_slope=0.100000","support","1","yes","CUDA" -"CUDA0","FLOOR","type=f32,ne=[7,1,5,3]","support","1","yes","CUDA" -"CUDA0","CEIL","type=f32,ne=[7,1,5,3]","support","1","yes","CUDA" -"CUDA0","ROUND","type=f32,ne=[7,1,5,3]","support","1","yes","CUDA" -"CUDA0","TRUNC","type=f32,ne=[7,1,5,3]","support","1","yes","CUDA" "CUDA0","DIAG_MASK_INF","type=f32,ne=[10,10,1,1],n_past=5","support","1","yes","CUDA" "CUDA0","DIAG_MASK_INF","type=f32,ne=[10,10,3,1],n_past=5","support","1","yes","CUDA" "CUDA0","DIAG_MASK_INF","type=f32,ne=[10,10,3,2],n_past=5","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=0,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=0,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=0,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=0,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=0,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=0,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=0,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=0,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=0,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=0,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=0,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=0,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=0,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=0,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=0,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=0,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,2,32,1],mask=0,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=1","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,2,32,1],mask=0,sinks=0,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=1","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=0,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=0,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=0,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=0,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=0,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=0,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=0,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=0,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=0,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=0,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=0,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=0,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=0,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=0,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=0,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=0,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,2,32,1],mask=0,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=1","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,2,32,1],mask=0,sinks=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=1","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,sinks=0,m_prec=f32,nr23=[3,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=0,m_prec=f32,nr23=[2,3],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,sinks=0,m_prec=f16,nr23=[3,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=0,m_prec=f16,nr23=[2,3],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,sinks=0,m_prec=f32,nr23=[3,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=0,m_prec=f32,nr23=[2,3],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,sinks=0,m_prec=f16,nr23=[3,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=0,m_prec=f16,nr23=[2,3],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,sinks=0,m_prec=f32,nr23=[3,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=0,m_prec=f32,nr23=[2,3],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,sinks=0,m_prec=f16,nr23=[3,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=0,m_prec=f16,nr23=[2,3],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,sinks=0,m_prec=f32,nr23=[3,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=0,m_prec=f32,nr23=[2,3],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,sinks=0,m_prec=f16,nr23=[3,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=0,m_prec=f16,nr23=[2,3],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,2,32,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=1","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,2,32,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=1","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,sinks=1,m_prec=f32,nr23=[3,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=1,m_prec=f32,nr23=[2,3],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,sinks=1,m_prec=f16,nr23=[3,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=1,m_prec=f16,nr23=[2,3],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,sinks=1,m_prec=f32,nr23=[3,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=1,m_prec=f32,nr23=[2,3],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,sinks=1,m_prec=f16,nr23=[3,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=1,m_prec=f16,nr23=[2,3],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,sinks=1,m_prec=f32,nr23=[3,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=1,m_prec=f32,nr23=[2,3],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,sinks=1,m_prec=f16,nr23=[3,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=1,m_prec=f16,nr23=[2,3],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,sinks=1,m_prec=f32,nr23=[3,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=1,m_prec=f32,nr23=[2,3],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,sinks=1,m_prec=f16,nr23=[3,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=1,m_prec=f16,nr23=[2,3],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,2,32,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=1","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,2,32,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=1","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,2,32,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,2,32,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[16,2,32,1],mask=0,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[32,2,32,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[32,2,32,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[32,2,32,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" -"CUDA0","SOFT_MAX","type=f32,ne=[32,2,32,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,m_prec=f32,nr23=[3,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f32,nr23=[2,3],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,m_prec=f16,nr23=[3,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f16,nr23=[2,3],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,m_prec=f32,nr23=[3,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f32,nr23=[2,3],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,m_prec=f16,nr23=[3,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f16,nr23=[2,3],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,m_prec=f32,nr23=[3,1],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f32,nr23=[2,3],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,m_prec=f16,nr23=[3,1],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f16,nr23=[2,3],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,m_prec=f32,nr23=[3,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f32,nr23=[2,3],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,m_prec=f16,nr23=[3,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f16,nr23=[2,3],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,2,32,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,2,32,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[16,2,32,1],mask=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[32,2,32,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[32,2,32,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[32,2,32,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA" +"CUDA0","SOFT_MAX","type=f32,ne=[32,2,32,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA" "CUDA0","SOFT_MAX_BACK","type=f32,ne=[16,16,1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" "CUDA0","SOFT_MAX_BACK","type=f32,ne=[15,15,1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX_BACK","type=f32,ne=[16,16,2,3],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" "CUDA0","SOFT_MAX_BACK","type=f32,ne=[16,1024,1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" "CUDA0","SOFT_MAX_BACK","type=f32,ne=[15,1023,1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX_BACK","type=f32,ne=[16,1024,2,3],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" "CUDA0","SOFT_MAX_BACK","type=f32,ne=[1024,16,1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" "CUDA0","SOFT_MAX_BACK","type=f32,ne=[1023,15,1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX_BACK","type=f32,ne=[1024,16,2,3],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" "CUDA0","SOFT_MAX_BACK","type=f32,ne=[1024,1024,1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" "CUDA0","SOFT_MAX_BACK","type=f32,ne=[1023,1023,1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX_BACK","type=f32,ne=[1024,1024,2,3],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA" "CUDA0","SOFT_MAX_BACK","type=f32,ne=[16,16,1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" "CUDA0","SOFT_MAX_BACK","type=f32,ne=[15,15,1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX_BACK","type=f32,ne=[16,16,2,3],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" "CUDA0","SOFT_MAX_BACK","type=f32,ne=[16,1024,1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" "CUDA0","SOFT_MAX_BACK","type=f32,ne=[15,1023,1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX_BACK","type=f32,ne=[16,1024,2,3],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" "CUDA0","SOFT_MAX_BACK","type=f32,ne=[1024,16,1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" "CUDA0","SOFT_MAX_BACK","type=f32,ne=[1023,15,1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX_BACK","type=f32,ne=[1024,16,2,3],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" "CUDA0","SOFT_MAX_BACK","type=f32,ne=[1024,1024,1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" "CUDA0","SOFT_MAX_BACK","type=f32,ne=[1023,1023,1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" -"CUDA0","SOFT_MAX_BACK","type=f32,ne=[1024,1024,2,3],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA" "CUDA0","SOFT_MAX_BACK","type=f32,ne=[16,16,1,1],scale=1.000000,max_bias=8.000000","support","0","no","CUDA" "CUDA0","SOFT_MAX_BACK","type=f32,ne=[15,15,1,1],scale=1.000000,max_bias=8.000000","support","0","no","CUDA" -"CUDA0","SOFT_MAX_BACK","type=f32,ne=[16,16,2,3],scale=1.000000,max_bias=8.000000","support","0","no","CUDA" "CUDA0","SOFT_MAX_BACK","type=f32,ne=[16,1024,1,1],scale=1.000000,max_bias=8.000000","support","0","no","CUDA" "CUDA0","SOFT_MAX_BACK","type=f32,ne=[15,1023,1,1],scale=1.000000,max_bias=8.000000","support","0","no","CUDA" -"CUDA0","SOFT_MAX_BACK","type=f32,ne=[16,1024,2,3],scale=1.000000,max_bias=8.000000","support","0","no","CUDA" "CUDA0","SOFT_MAX_BACK","type=f32,ne=[1024,16,1,1],scale=1.000000,max_bias=8.000000","support","0","no","CUDA" "CUDA0","SOFT_MAX_BACK","type=f32,ne=[1023,15,1,1],scale=1.000000,max_bias=8.000000","support","0","no","CUDA" -"CUDA0","SOFT_MAX_BACK","type=f32,ne=[1024,16,2,3],scale=1.000000,max_bias=8.000000","support","0","no","CUDA" "CUDA0","SOFT_MAX_BACK","type=f32,ne=[1024,1024,1,1],scale=1.000000,max_bias=8.000000","support","0","no","CUDA" "CUDA0","SOFT_MAX_BACK","type=f32,ne=[1023,1023,1,1],scale=1.000000,max_bias=8.000000","support","0","no","CUDA" -"CUDA0","SOFT_MAX_BACK","type=f32,ne=[1024,1024,2,3],scale=1.000000,max_bias=8.000000","support","0","no","CUDA" "CUDA0","SOFT_MAX_BACK","type=f32,ne=[16,16,1,1],scale=0.100000,max_bias=8.000000","support","0","no","CUDA" "CUDA0","SOFT_MAX_BACK","type=f32,ne=[15,15,1,1],scale=0.100000,max_bias=8.000000","support","0","no","CUDA" -"CUDA0","SOFT_MAX_BACK","type=f32,ne=[16,16,2,3],scale=0.100000,max_bias=8.000000","support","0","no","CUDA" "CUDA0","SOFT_MAX_BACK","type=f32,ne=[16,1024,1,1],scale=0.100000,max_bias=8.000000","support","0","no","CUDA" "CUDA0","SOFT_MAX_BACK","type=f32,ne=[15,1023,1,1],scale=0.100000,max_bias=8.000000","support","0","no","CUDA" -"CUDA0","SOFT_MAX_BACK","type=f32,ne=[16,1024,2,3],scale=0.100000,max_bias=8.000000","support","0","no","CUDA" "CUDA0","SOFT_MAX_BACK","type=f32,ne=[1024,16,1,1],scale=0.100000,max_bias=8.000000","support","0","no","CUDA" "CUDA0","SOFT_MAX_BACK","type=f32,ne=[1023,15,1,1],scale=0.100000,max_bias=8.000000","support","0","no","CUDA" -"CUDA0","SOFT_MAX_BACK","type=f32,ne=[1024,16,2,3],scale=0.100000,max_bias=8.000000","support","0","no","CUDA" "CUDA0","SOFT_MAX_BACK","type=f32,ne=[1024,1024,1,1],scale=0.100000,max_bias=8.000000","support","0","no","CUDA" "CUDA0","SOFT_MAX_BACK","type=f32,ne=[1023,1023,1,1],scale=0.100000,max_bias=8.000000","support","0","no","CUDA" -"CUDA0","SOFT_MAX_BACK","type=f32,ne=[1024,1024,2,3],scale=0.100000,max_bias=8.000000","support","0","no","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,40,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,52,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,64,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[64,1,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[64,71,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[64,8,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[80,16,2,1],n_dims=80,mode=24,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,16,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,40,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,52,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,64,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[64,1,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[64,71,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[64,8,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[80,16,2,1],n_dims=80,mode=24,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,16,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,40,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,52,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,64,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[64,1,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[64,71,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[64,8,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[80,16,2,1],n_dims=80,mode=24,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,16,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,40,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,52,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,64,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[64,1,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[64,71,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[64,8,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[80,16,2,1],n_dims=80,mode=24,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,16,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,40,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,52,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,64,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[64,1,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[64,71,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[64,8,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[80,16,2,1],n_dims=80,mode=24,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,16,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,40,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,52,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,64,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[64,1,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[64,71,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[64,8,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[80,16,2,1],n_dims=80,mode=24,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,16,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,40,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,52,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,64,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[64,1,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[64,71,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[64,8,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[80,16,2,1],n_dims=80,mode=24,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,16,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,40,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,52,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,64,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[64,1,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[64,71,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[64,8,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[80,16,2,1],n_dims=80,mode=24,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,16,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0,inplace=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0,inplace=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0,inplace=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0,inplace=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0,inplace=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0,inplace=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0,inplace=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0,inplace=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=24,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0,inplace=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=24,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0,inplace=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0,inplace=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0,inplace=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0,inplace=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0,inplace=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0,inplace=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0,inplace=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0,inplace=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0,inplace=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=24,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0,inplace=1","support","1","yes","CUDA" -"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=24,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0,inplace=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,40,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,52,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,64,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[64,1,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[64,71,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[64,8,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[80,16,2,1],n_dims=80,mode=24,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,40,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,52,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,64,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[64,1,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[64,71,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[64,8,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[80,16,2,1],n_dims=80,mode=24,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,40,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,52,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,64,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[64,1,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[64,71,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[64,8,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[80,16,2,1],n_dims=80,mode=24,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,40,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,52,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,64,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[64,1,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[64,71,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[64,8,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[80,16,2,1],n_dims=80,mode=24,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=1","support","1","yes","CUDA" +"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=1","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,40,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,52,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,64,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[64,1,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[64,71,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[64,8,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[80,16,2,1],n_dims=80,mode=24,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,40,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,52,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,64,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[64,1,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[64,71,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[64,8,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[80,16,2,1],n_dims=80,mode=24,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,40,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,52,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,64,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[64,1,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[64,71,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[64,8,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[80,16,2,1],n_dims=80,mode=24,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,40,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,52,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,64,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[64,1,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[64,71,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[64,8,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[80,16,2,1],n_dims=80,mode=24,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=1","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=1","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=1","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=1","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=1","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=1","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=1","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=1","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=1","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=1","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=1","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=1","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=1","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=1","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=1","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=1","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=1","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=1","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=1","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=1","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=1","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=1","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=1","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=1","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=1","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=1","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=1","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=1","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=1","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=1","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=1","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=1","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=1","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=1","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=1","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=1","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=1","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=1","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=1","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=1","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=1","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=1","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=1","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=1","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=1","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=1","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=1","support","1","yes","CUDA" +"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=1","support","1","yes","CUDA" "CUDA0","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=0,v=0","support","1","yes","CUDA" "CUDA0","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=0,v=0","support","0","no","CUDA" "CUDA0","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=1,v=0","support","1","yes","CUDA" @@ -9173,15 +5222,9 @@ "CUDA0","ARGSORT","type=f32,ne=[8,1,1,1],order=0","support","1","yes","CUDA" "CUDA0","ARGSORT","type=f32,ne=[16,10,10,10],order=0","support","1","yes","CUDA" "CUDA0","ARGSORT","type=f32,ne=[60,10,10,10],order=0","support","1","yes","CUDA" -"CUDA0","ARGSORT","type=f32,ne=[1024,1,1,1],order=0","support","1","yes","CUDA" -"CUDA0","ARGSORT","type=f32,ne=[16384,1,1,1],order=0","support","1","yes","CUDA" -"CUDA0","ARGSORT","type=f32,ne=[2,8,8192,1],order=0","support","1","yes","CUDA" "CUDA0","ARGSORT","type=f32,ne=[8,1,1,1],order=1","support","1","yes","CUDA" "CUDA0","ARGSORT","type=f32,ne=[16,10,10,10],order=1","support","1","yes","CUDA" "CUDA0","ARGSORT","type=f32,ne=[60,10,10,10],order=1","support","1","yes","CUDA" -"CUDA0","ARGSORT","type=f32,ne=[1024,1,1,1],order=1","support","1","yes","CUDA" -"CUDA0","ARGSORT","type=f32,ne=[16384,1,1,1],order=1","support","1","yes","CUDA" -"CUDA0","ARGSORT","type=f32,ne=[2,8,8192,1],order=1","support","1","yes","CUDA" "CUDA0","UPSCALE","type=f32,ne=[512,512,3,2],scale_factor=2,mode=nearest,transpose=0","support","1","yes","CUDA" "CUDA0","UPSCALE","type=f32,ne=[512,512,3,2],scale_factor=2,mode=nearest,transpose=1","support","1","yes","CUDA" "CUDA0","UPSCALE","type=f32,ne=[2,5,7,11],ne_tgt=[5,7,11,13],mode=0","support","1","yes","CUDA" @@ -9191,8845 +5234,2116 @@ "CUDA0","UPSCALE","type=f32,ne=[2,5,7,11],ne_tgt=[5,7,11,13],mode=1","support","1","yes","CUDA" "CUDA0","UPSCALE","type=f32,ne=[5,7,11,13],ne_tgt=[2,5,7,11],mode=1","support","1","yes","CUDA" "CUDA0","UPSCALE","type=f32,ne=[2,5,7,11],ne_tgt=[5,7,11,13],mode=257","support","1","yes","CUDA" -"CUDA0","UPSCALE","type=f32,ne=[1,4,3,2],ne_tgt=[2,8,3,2],mode=257","support","1","yes","CUDA" -"CUDA0","UPSCALE","type=f32,ne=[4,1,3,2],ne_tgt=[1,1,3,2],mode=257","support","1","yes","CUDA" "CUDA0","SUM","type=f32,ne=[10,5,4,3]","support","1","yes","CUDA" -"CUDA0","SUM_ROWS","type=f32,ne=[10,5,4,3],permute=0,slice=0","support","1","yes","CUDA" -"CUDA0","SUM","type=f32,ne=[11,5,6,3],permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","SUM","type=f32,ne=[11,5,6,3],permute=[0,3,2,1]","support","1","yes","CUDA" -"CUDA0","SUM","type=f32,ne=[11,5,6,3],permute=[0,1,3,2]","support","1","yes","CUDA" -"CUDA0","SUM_ROWS","type=f32,ne=[11,5,6,3],permute=1,slice=0","support","0","no","CUDA" -"CUDA0","SUM_ROWS","type=f32,ne=[11,5,6,3],permute=0,slice=1","support","0","no","CUDA" -"CUDA0","SUM_ROWS","type=f32,ne=[11,5,6,3],permute=1,slice=1","support","0","no","CUDA" +"CUDA0","SUM_ROWS","type=f32,ne=[10,5,4,3]","support","1","yes","CUDA" "CUDA0","MEAN","type=f32,ne=[10,5,4,3]","support","1","yes","CUDA" -"CUDA0","SUM","type=f32,ne=[33,1,1,1]","support","1","yes","CUDA" -"CUDA0","SUM_ROWS","type=f32,ne=[33,1,1,1],permute=0,slice=0","support","1","yes","CUDA" -"CUDA0","MEAN","type=f32,ne=[33,1,1,1]","support","1","yes","CUDA" -"CUDA0","SUM","type=f32,ne=[33,1024,1,1]","support","1","yes","CUDA" -"CUDA0","SUM_ROWS","type=f32,ne=[33,1024,1,1],permute=0,slice=0","support","1","yes","CUDA" -"CUDA0","SUM","type=f32,ne=[33,256,1,1]","support","1","yes","CUDA" -"CUDA0","SUM","type=f32,ne=[33,256,1,1],permute=[1,0,2,3]","support","0","no","CUDA" -"CUDA0","SUM_ROWS","type=f32,ne=[33,256,1,1],permute=0,slice=0","support","1","yes","CUDA" -"CUDA0","MEAN","type=f32,ne=[33,256,1,1]","support","1","yes","CUDA" -"CUDA0","MEAN","type=f32,ne=[32769,1,1,1]","support","1","yes","CUDA" "CUDA0","GROUP_NORM","type=f32,ne=[64,64,320,1],num_groups=32,eps=0.000001","support","1","yes","CUDA" "CUDA0","GROUP_NORM","type=f32,ne=[9,9,1280,1],num_groups=32,eps=0.000001","support","1","yes","CUDA" "CUDA0","ACC","type=f32,ne_a=[256,17,1,1],ne_b=[256,16,1,1]","support","1","yes","CUDA" "CUDA0","PAD","type=f32,ne_a=[512,512,1,1],pad_0=1,pad_1=1","support","1","yes","CUDA" -"CUDA0","PAD","type=f32,ne_a=[512,512,3,1],lp0=1,rp0=1,lp1=1,rp1=1,lp2=1,rp2=1,lp3=1,rp3=1,v=0","support","1","yes","CUDA" -"CUDA0","PAD_REFLECT_1D","type=f32,ne_a=[512,34,2,1],pad_0=10,pad_1=9","support","1","yes","CUDA" -"CUDA0","PAD_REFLECT_1D","type=f32,ne_a=[3000,384,4,1],pad_0=10,pad_1=9","support","1","yes","CUDA" -"CUDA0","ROLL","shift0=3,shift1=-2,shift3=1,shift4=-1","support","1","yes","CUDA" +"CUDA0","PAD_REFLECT_1D","type=f32,ne_a=[512,34,2,1],pad_0=10,pad_1=9","support","0","no","CUDA" +"CUDA0","ROLL","shift0=3,shift1=-2,shift3=1,shift4=-1","support","0","no","CUDA" "CUDA0","ARANGE","type=f32,start=0.000000,stop=10.000000,step=1.000000","support","1","yes","CUDA" "CUDA0","TIMESTEP_EMBEDDING","type=f32,ne_a=[2,1,1,1],dim=320,max_period=10000","support","1","yes","CUDA" "CUDA0","LEAKY_RELU","type=f32,ne_a=[10,5,4,3],negative_slope=0.100000","support","1","yes","CUDA" -"CUDA0","PAD","type=f32,ne_a=[512,512,1,1],lp0=0,rp0=1,lp1=0,rp1=1,lp2=0,rp2=0,lp3=0,rp3=0,v=0","support","1","yes","CUDA" -"CUDA0","PAD","type=f32,ne_a=[11,22,33,44],lp0=1,rp0=2,lp1=3,rp1=4,lp2=5,rp2=6,lp3=7,rp3=8,v=0","support","1","yes","CUDA" -"CUDA0","PAD","type=f32,ne_a=[512,512,1,1],lp0=0,rp0=1,lp1=0,rp1=1,lp2=0,rp2=0,lp3=0,rp3=0,v=1","support","0","no","CUDA" -"CUDA0","PAD","type=f32,ne_a=[11,22,33,44],lp0=1,rp0=2,lp1=3,rp1=4,lp2=5,rp2=6,lp3=7,rp3=8,v=1","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" -"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA" +"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA" "CUDA0","CROSS_ENTROPY_LOSS","type=f32,ne=[10,5,4,3]","support","1","yes","CUDA" "CUDA0","CROSS_ENTROPY_LOSS","type=f32,ne=[30000,1,1,1]","support","1","yes","CUDA" "CUDA0","CROSS_ENTROPY_LOSS_BACK","type=f32,ne=[10,5,4,3]","support","1","yes","CUDA" "CUDA0","CROSS_ENTROPY_LOSS_BACK","type=f32,ne=[30000,1,1,1]","support","1","yes","CUDA" "CUDA0","OPT_STEP_ADAMW","type=f32,ne=[10,5,4,3]","support","1","yes","CUDA" -"CUDA0","OPT_STEP_SGD","type=f32,ne=[10,5,4,3]","support","1","yes","CUDA" From 919dd248f7abf99d42086ac75b5ec1942925721d Mon Sep 17 00:00:00 2001 From: mnehete32 Date: Tue, 4 Nov 2025 23:16:15 +0530 Subject: [PATCH 5/5] docs: only updating ops added by me --- docs/ops.md | 10 +- docs/ops/CUDA.csv | 1620 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 1625 insertions(+), 5 deletions(-) diff --git a/docs/ops.md b/docs/ops.md index 3738a48072832..b8fcf0463581d 100644 --- a/docs/ops.md +++ b/docs/ops.md @@ -22,11 +22,11 @@ Legend: | ARANGE | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | | ARGMAX | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ | | ARGSORT | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | -| CEIL | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | +| CEIL | ❌ | ❌ | ✅ | 🟡 | ❌ | ❌ | ✅ | ❌ | ❌ | | CLAMP | ❌ | ✅ | ✅ | ✅ | 🟡 | 🟡 | ✅ | 🟡 | ❌ | | CONCAT | ❌ | ✅ | ✅ | 🟡 | ✅ | 🟡 | 🟡 | ✅ | ❌ | | CONT | ❌ | 🟡 | ✅ | ✅ | ✅ | 🟡 | 🟡 | 🟡 | ❌ | -| CONV_2D | ❌ | ❌ | ✅ | ❌ | ❌ | ✅ | ❌ | ✅ | ❌ | +| CONV_2D | ❌ | ❌ | ✅ | 🟡 | ❌ | ✅ | ❌ | ✅ | ❌ | | CONV_2D_DW | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ | | CONV_3D | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | | CONV_TRANSPOSE_1D | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ | @@ -42,7 +42,7 @@ Legend: | ELU | ❌ | ✅ | ✅ | 🟡 | 🟡 | ❌ | 🟡 | ❌ | ❌ | | EXP | ❌ | ✅ | ✅ | 🟡 | 🟡 | ❌ | 🟡 | ❌ | ❌ | | FLASH_ATTN_EXT | ❌ | 🟡 | ✅ | 🟡 | 🟡 | ❌ | ❌ | 🟡 | ❌ | -| FLOOR | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | +| FLOOR | ❌ | ❌ | ✅ | 🟡 | ❌ | ❌ | ✅ | ❌ | ❌ | | GATED_LINEAR_ATTN | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ✅ | ❌ | ❌ | | GEGLU | ❌ | ✅ | ✅ | ✅ | 🟡 | ✅ | ✅ | 🟡 | ❌ | | GEGLU_ERF | ❌ | ✅ | ✅ | ✅ | 🟡 | ✅ | ✅ | 🟡 | ❌ | @@ -84,7 +84,7 @@ Legend: | ROLL | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | | ROPE | ❌ | 🟡 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | | ROPE_BACK | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ | -| ROUND | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | +| ROUND | ❌ | ❌ | ✅ | 🟡 | ❌ | ❌ | ✅ | ❌ | ❌ | | RWKV_WKV6 | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ | | RWKV_WKV7 | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ | | SCALE | ❌ | 🟡 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | @@ -111,6 +111,6 @@ Legend: | TANH | ❌ | ✅ | ✅ | 🟡 | 🟡 | ✅ | 🟡 | 🟡 | ❌ | | TIMESTEP_EMBEDDING | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | | TOPK_MOE | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | -| TRUNC | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | +| TRUNC | ❌ | ❌ | ✅ | 🟡 | ❌ | ❌ | ✅ | ❌ | ❌ | | UPSCALE | ❌ | 🟡 | ✅ | ✅ | 🟡 | ✅ | 🟡 | ✅ | ❌ | | XIELU | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | diff --git a/docs/ops/CUDA.csv b/docs/ops/CUDA.csv index 71e47977e31d1..4589bb51e0ecd 100644 --- a/docs/ops/CUDA.csv +++ b/docs/ops/CUDA.csv @@ -7347,3 +7347,1623 @@ "CUDA0","CROSS_ENTROPY_LOSS_BACK","type=f32,ne=[10,5,4,3]","support","1","yes","CUDA" "CUDA0","CROSS_ENTROPY_LOSS_BACK","type=f32,ne=[30000,1,1,1]","support","1","yes","CUDA" "CUDA0","OPT_STEP_ADAMW","type=f32,ne=[10,5,4,3]","support","1","yes","CUDA" +"CUDA0","CEIL","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","CUDA" +"CUDA0","CEIL","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","CUDA" +"CUDA0","CEIL","type=f16,ne_a=[128,2,2,2],v=1","support","0","no","CUDA" +"CUDA0","CEIL","type=f16,ne_a=[5,7,11,13],v=1","support","0","no","CUDA" +"CUDA0","CEIL","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","CUDA" +"CUDA0","CEIL","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","CUDA" +"CUDA0","CEIL","type=f32,ne_a=[128,2,2,2],v=1","support","0","no","CUDA" +"CUDA0","CEIL","type=f32,ne_a=[5,7,11,13],v=1","support","0","no","CUDA" +"CUDA0","CEIL","type=f16,ne=[10,2,2,2]","support","1","yes","CUDA" +"CUDA0","CEIL","type=f16,ne=[7,1,5,3]","support","1","yes","CUDA" +"CUDA0","CEIL","type=f32,ne=[10,2,2,2]","support","1","yes","CUDA" +"CUDA0","CEIL","type=f32,ne=[7,1,5,3]","support","1","yes","CUDA" +"CUDA0","FLOOR","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","CUDA" +"CUDA0","FLOOR","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","CUDA" +"CUDA0","FLOOR","type=f16,ne_a=[128,2,2,2],v=1","support","0","no","CUDA" +"CUDA0","FLOOR","type=f16,ne_a=[5,7,11,13],v=1","support","0","no","CUDA" +"CUDA0","FLOOR","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","CUDA" +"CUDA0","FLOOR","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","CUDA" +"CUDA0","FLOOR","type=f32,ne_a=[128,2,2,2],v=1","support","0","no","CUDA" +"CUDA0","FLOOR","type=f32,ne_a=[5,7,11,13],v=1","support","0","no","CUDA" +"CUDA0","FLOOR","type=f16,ne=[10,2,2,2]","support","1","yes","CUDA" +"CUDA0","FLOOR","type=f16,ne=[7,1,5,3]","support","1","yes","CUDA" +"CUDA0","FLOOR","type=f32,ne=[10,2,2,2]","support","1","yes","CUDA" +"CUDA0","FLOOR","type=f32,ne=[7,1,5,3]","support","1","yes","CUDA" +"CUDA0","ROUND","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","CUDA" +"CUDA0","ROUND","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","CUDA" +"CUDA0","ROUND","type=f16,ne_a=[128,2,2,2],v=1","support","0","no","CUDA" +"CUDA0","ROUND","type=f16,ne_a=[5,7,11,13],v=1","support","0","no","CUDA" +"CUDA0","ROUND","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","CUDA" +"CUDA0","ROUND","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","CUDA" +"CUDA0","ROUND","type=f32,ne_a=[128,2,2,2],v=1","support","0","no","CUDA" +"CUDA0","ROUND","type=f32,ne_a=[5,7,11,13],v=1","support","0","no","CUDA" +"CUDA0","ROUND","type=f16,ne=[10,2,2,2]","support","1","yes","CUDA" +"CUDA0","ROUND","type=f16,ne=[7,1,5,3]","support","1","yes","CUDA" +"CUDA0","ROUND","type=f32,ne=[10,2,2,2]","support","1","yes","CUDA" +"CUDA0","ROUND","type=f32,ne=[7,1,5,3]","support","1","yes","CUDA" +"CUDA0","TRUNC","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","CUDA" +"CUDA0","TRUNC","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","CUDA" +"CUDA0","TRUNC","type=f16,ne_a=[128,2,2,2],v=1","support","0","no","CUDA" +"CUDA0","TRUNC","type=f16,ne_a=[5,7,11,13],v=1","support","0","no","CUDA" +"CUDA0","TRUNC","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","CUDA" +"CUDA0","TRUNC","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","CUDA" +"CUDA0","TRUNC","type=f32,ne_a=[128,2,2,2],v=1","support","0","no","CUDA" +"CUDA0","TRUNC","type=f32,ne_a=[5,7,11,13],v=1","support","0","no","CUDA" +"CUDA0","TRUNC","type=f16,ne=[10,2,2,2]","support","1","yes","CUDA" +"CUDA0","TRUNC","type=f16,ne=[7,1,5,3]","support","1","yes","CUDA" +"CUDA0","TRUNC","type=f32,ne=[10,2,2,2]","support","1","yes","CUDA" +"CUDA0","TRUNC","type=f32,ne=[7,1,5,3]","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D_DW","ne_input=[17,34,9,1],ne_kernel=[3,3,1,9],stride=1,padding=0,dilation=1,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D_DW","ne_input=[17,34,9,1],ne_kernel=[3,3,1,9],stride=1,padding=0,dilation=1,cwhn=1","support","1","yes","CUDA" +"CUDA0","CONV_2D_DW","ne_input=[32,8,64,1],ne_kernel=[3,3,1,64],stride=2,padding=1,dilation=1,cwhn=0","support","1","yes","CUDA" +"CUDA0","CONV_2D_DW","ne_input=[32,8,64,1],ne_kernel=[3,3,1,64],stride=2,padding=1,dilation=1,cwhn=1","support","1","yes","CUDA"